Upload 9 files
Browse files- README.md +112 -44
- adaptive_knowledge.py +486 -0
- agent.py +1902 -177
- map_graph.py +119 -0
- mcp_server.py +609 -138
- memory.py +257 -0
- prompts.py +537 -0
- spatial_memory.py +88 -0
- test_jericho.py +122 -0
README.md
CHANGED
|
@@ -1,59 +1,127 @@
|
|
| 1 |
-
|
| 2 |
-
title: Text Adventure Agent Submission
|
| 3 |
-
emoji: "\U0001F5FA"
|
| 4 |
-
colorFrom: green
|
| 5 |
-
colorTo: blue
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: "5.12.0"
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
license: mit
|
| 11 |
-
---
|
| 12 |
|
| 13 |
-
|
| 14 |
|
| 15 |
-
## Overview
|
| 16 |
|
| 17 |
-
|
| 18 |
|
| 19 |
-
|
| 20 |
|
| 21 |
-
|
| 22 |
|
| 23 |
-
|
| 24 |
-
- What tools did you implement in your MCP server?
|
| 25 |
-
- Any interesting techniques or optimizations?
|
| 26 |
|
| 27 |
-
## Files
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|------|-------------|
|
| 31 |
-
| `agent.py` | ReAct agent with `StudentAgent` class |
|
| 32 |
-
| `mcp_server.py` | MCP server with game interaction tools |
|
| 33 |
-
| `app.py` | Gradio interface for HF Space |
|
| 34 |
-
| `requirements.txt` | Additional dependencies |
|
| 35 |
|
| 36 |
-
## How to Submit
|
| 37 |
|
| 38 |
-
|
| 39 |
-
2. Clone your fork locally
|
| 40 |
-
3. Implement your agent in `agent.py` and `mcp_server.py`
|
| 41 |
-
4. Test locally (see below)
|
| 42 |
-
5. Push your changes to your Space
|
| 43 |
-
6. Submit your Space URL on the course platform
|
| 44 |
|
| 45 |
-
## Local Testing
|
| 46 |
|
| 47 |
-
|
| 48 |
-
# Install dependencies
|
| 49 |
-
pip install -r requirements.txt
|
| 50 |
|
| 51 |
-
|
| 52 |
-
fastmcp dev mcp_server.py
|
| 53 |
|
| 54 |
-
|
| 55 |
-
python run_agent.py --agent . --game lostpig -v -n 20
|
| 56 |
|
| 57 |
-
#
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Zork Submission : MCP Agent
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
Ce projet présente un Agent ReAct couplé à un serveur MCP personnalisé pour jouer à des jeux d'aventures textuels (Zork, Lostpig). L'objectif est de dépasser les limites d'un LLM classique en enrichissant ses capacités d'observation, sa mémoire spatiale et sa stratégie décisionnelle.
|
| 4 |
|
|
|
|
| 5 |
|
| 6 |
+
## Architecture
|
| 7 |
|
| 8 |
+
L'architecture repose sur une séparation stricte entre la logique de jeu, serveur MCP, et la stratégie cognitive, l'Agent.
|
| 9 |
|
| 10 |
+
### Serveur MCP
|
| 11 |
|
| 12 |
+
Le serveur MCP a été conçu pour fournir à l'agent des outils de haute précision en s'appuyant sur l'API Jericho. Son rôle est d'exposer des « vérités absolues » sur l'état interne du jeu, permettant ainsi de fiabiliser les actions de l'agent.
|
|
|
|
|
|
|
| 13 |
|
|
|
|
| 14 |
|
| 15 |
+
Le serveur met à disposition plusieurs outils clés :
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
|
|
|
| 17 |
|
| 18 |
+
- **get_location_info** : Extrait des données structurées sur la position actuelle (ID et nom du lieu, inventaire des objets présents et leur hiérarchie).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
|
|
|
| 20 |
|
| 21 |
+
- **get_valid_actions** : Utilise la fonction éponyme native de Jericho pour générer une liste d'actions valides et prêtes à l'emploi. Cela aide l'agent à choisir ou à construire des commandes syntaxiquement correctes.
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
- **play_action** : L'interface directe permettant à l'agent d'exécuter une commande et d'interagir avec l'univers du jeu.
|
|
|
|
| 24 |
|
| 25 |
+
D'autres outils ont été partiellement implémentés pour détecter les sorties, tester la grammaire des actions ou identifier les interactions possibles avec un objet spécifique. Bien que prometteurs pour affiner la précision de l'agent, ces modules n'ont pas été finalisés ou utilisé par manque de temps.
|
|
|
|
| 26 |
|
| 27 |
+
### Agent
|
| 28 |
+
|
| 29 |
+
L'agent repose sur une architecture ReAct, augmentée par plusieurs modules spécialisés. L'objectif est de transformer un agent purement réactif en une entité dotée d'une conscience sémantique et d'une mémoire spatiale, capable d'interpréter son environnement au-delà du simple texte.
|
| 30 |
+
|
| 31 |
+
#### Observation Extractor
|
| 32 |
+
|
| 33 |
+
Ce module est crucial pour la compréhension de l'environnement. Au lieu de se contenter de stocker le texte brut des réponses du jeu, l'extracteur fusionne les données brutes issues de l'API Jericho avec les descriptions textuelles pour produire une synthèse structurée.
|
| 34 |
+
|
| 35 |
+
- Synthèse de données : Il extrait et catégorise les informations essentielles (sorties visibles, entités, objets interactifs ou ramassables).
|
| 36 |
+
|
| 37 |
+
- Précision syntaxique : Il identifie les noms exacts des objets utilisables dans les commandes, réduisant ainsi les erreurs de syntaxe.
|
| 38 |
+
|
| 39 |
+
- Filtrage intelligent : Le but est de synthétiser au maximum les informations pour ne transmettre à l'agent que les données actionnables, évitant ainsi la surcharge cognitive du modèle.
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
#### Guidance Priority
|
| 43 |
+
|
| 44 |
+
Ce module agit comme le centre décisionnel de l'agent en hiérarchisant les tâches à accomplir. Il fournit un contexte riche et orienté vers l'action, structuré autour de plusieurs piliers :
|
| 45 |
+
|
| 46 |
+
- Contexte immédiat : L'agent reçoit systématiquement le feedback de sa dernière action, l'historique des interactions spécifiques à la pièce actuelle, ainsi qu'un état des lieux des objets environnants.
|
| 47 |
+
|
| 48 |
+
- Gestion des objets : Le module distingue les objets avec lesquels l'agent a déjà interagi (en précisant la dernière interaction) de ceux encore inexplorés. Cette distinction guide l'agent vers de nouvelles découvertes.
|
| 49 |
+
|
| 50 |
+
- Actions suggérées : En combinant les actions de base et les commandes validées par l'outil get_valid_actions, le module propose un catalogue d'options pertinentes pour faciliter la prise de décision.
|
| 51 |
+
|
| 52 |
+
- Régulation Comportementale :
|
| 53 |
+
|
| 54 |
+
- Contrôle de l'Exploration : Un compteur de temps passé dans une localisation est intégré. S'il devient trop élevé, l'agent est incité à se déplacer pour éviter de rester bloqué sur un objet ou une énigme.
|
| 55 |
+
|
| 56 |
+
- Dynamisme : Des priorités spécifiques sont injectées lors d'événements clés, comme l'entrée dans une nouvelle zone ou une modification majeure de l'état du monde, favorisant systématiquement l'exploration de nouveaux lieux.
|
| 57 |
+
|
| 58 |
+
#### World Mapper
|
| 59 |
+
|
| 60 |
+
Pour pallier l'absence de vision spatiale native des LLM, un module de cartographie a été intégré sous la forme d'un graphe dynamique. Ce module permet à l'agent de maintenir une représentation persistante et structurée de l'univers de jeu.
|
| 61 |
+
|
| 62 |
+
- Structure du graphe : La carte répertorie les localisations, les connexions entre elles , ainsi que les objets, puzzles et interactions spécifiques à chaque lieu.
|
| 63 |
+
|
| 64 |
+
- Mémoire des déplacements : À partir des observations textuelles et des actions effectuées, le module enregistre non seulement les chemins validés, mais aussi les sorties potentielles mentionnées dans les descriptions et celles bloqués
|
| 65 |
+
|
| 66 |
+
- Analyse des connecteurs : Le système distingue les sorties confirmées des passages bloqués ou impossibles, permettant ainsi d'anticiper l'impact des futurs déplacements.
|
| 67 |
+
|
| 68 |
+
- Mise à jour temps réel : Le graphe est actualisé après chaque mouvement, observation ou changement d'état de l'environnement, garantissant que l'agent dispose toujours d'une vue d'ensemble fiable pour planifier ses actions.
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
#### Mémoire des Actions
|
| 72 |
+
|
| 73 |
+
Pour renforcer la cohérence temporelle, l'agent dispose d'une mémoire historique indexée par localisation. Ce module enregistre chaque commande, son résultat brut ainsi qu'un résumé synthétique de l'effet produit.
|
| 74 |
+
|
| 75 |
+
- Récupération Contextuelle : Lors de la génération des directives (guidances), le système interroge cette mémoire pour extraire soit les 10 dernières actions effectuées, soit les interactions spécifiques liées à un objet précis.
|
| 76 |
+
|
| 77 |
+
- Utilité : Cela permet à l'agent de ne pas répéter indéfiniment des tentatives infructueuses et de s'appuyer sur ses succès passés pour progresser.
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
#### Critique de l'agent
|
| 81 |
+
|
| 82 |
+
Un module de critique a été implémenté pour évaluer la pertinence des décisions en temps réel. Après chaque action, un LLM secondaire analyse le résultat pour déterminer si l'objectif a été atteint ou non.
|
| 83 |
+
|
| 84 |
+
- Auto-correction : Ce module identifie explicitement les erreurs de syntaxe dans l'appel des outils ou les actions redondantes (ex: essayer d'ouvrir une porte déjà ouverte).
|
| 85 |
+
|
| 86 |
+
- Orientation stratégique : En explicitant les raisons d'un échec, la critique guide l'agent vers des alternatives plus logiques, transformant chaque erreur en une donnée d'apprentissage pour le tour suivant.
|
| 87 |
+
|
| 88 |
+
## Pistes d'Amélioration et Expérimentations
|
| 89 |
+
|
| 90 |
+
Cette section explicite les fonctionnalités testées puis écartées, ainsi que les perspectives.
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
#### Fonctionnalités écartés
|
| 94 |
+
Plusieurs approches basées sur l'autonomie totale du LLM ont été testées :
|
| 95 |
+
|
| 96 |
+
- Base de connaissances dynamique : J'ai tenté de laisser le LLM mettre à jour sa propre base de connaissances tous les 10 tours. Cependant, le modèle générait trop de "bruit" : informations verbeuses, répétitions malgré les consignes, et hallucinations sur l'état du monde. Cette surcharge d'informations inutiles polluait le prompt et dégradait les performances.
|
| 97 |
+
|
| 98 |
+
- Mémoire générative vs Mémoire factuelle : Une tentative de mémoire gérée par LLM a montré les mêmes limites (redondances et inventions). J'ai donc privilégié une mémoire basée strictement sur les actions et les sorties réelles, plus fiable pour la prise de décision.
|
| 99 |
+
|
| 100 |
+
#### Perpectives
|
| 101 |
+
|
| 102 |
+
Par manque de temps, certaines fonctionnalités structurantes n'ont pu être finalisées :
|
| 103 |
+
|
| 104 |
+
- Module Stratège : L'implémentation d'un module stratége capable d'analyser la carte et l'inventaire pour élaborer des plans à long terme (décomposition en sous-objectifs). Cela permettrait de passer d'un agent purement "réactif" à un agent véritablement "proactif".
|
| 105 |
+
|
| 106 |
+
- RAG (Retrieval-Augmented Generation) : L'intégration d'un système RAG pour interroger intelligemment la mémoire et la carte aurait permis d'affiner le contexte envoyé au LLM sans le surcharger.
|
| 107 |
+
|
| 108 |
+
- Critique Renforcée : Un module de critique plus directif pour recadrer l'agent de manière plus autoritaire en cas de boucle d'actions infructueuse.
|
| 109 |
+
|
| 110 |
+
## Analyse de performance
|
| 111 |
+
|
| 112 |
+
Le dépassement de la baseline reste un défi complexe et sujet à une certaine variabilité (runs, modèles, ajustement des prompts).
|
| 113 |
+
|
| 114 |
+
- Comportement exploratoire : L'agent actuel privilégie une stratégie d'exploration intensive. S'il parvient à résoudre des énigmes, c'est souvent par une interaction systématique avec les objets de son environnement plutôt que par un raisonnement logique déduit de l'énigme elle-même.
|
| 115 |
+
|
| 116 |
+
- Synergie agent-Mcp : Bien que la théorie préconise une indépendance totale, l'agent est ici étroitement couplé au serveur MCP pour maximiser l'utilisation des "sources de vérité". Ce choix a été fait pour garantir une découverte optimale des lieux et des objets, le serveur MCP servant de socle de fiabilité pour toutes les interactions. De plus dans la example il y avait déjà un couplage avec la première action d'appeller l'outil play_action en dur dans le code
|
| 117 |
+
|
| 118 |
+
- Limites de raisonnement : L'agent manque encore d'une vision holistique de sa mission. Sa force réside dans sa capacité à cartographier et interagir, mais il ne possède pas encore le recul nécessaire pour prioriser des objectifs de haut niveau.
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
## Conclusion
|
| 122 |
+
|
| 123 |
+
Ce projet a permis de mettre en lumière la complexité de l'orchestration entre un LLM et un environnement dynamique via le protocole MCP.
|
| 124 |
+
|
| 125 |
+
La leçon principale de ce travail est que les capacités intrinsèques du LLM sont secondaires par rapport à la qualité de l'architecture logicielle qui l'entoure (contexte, mémoire, modules déterministes). Au cours du développement, j'ai compris que confier des tâches trop complexes ou trop libres à l'agent mène souvent à l'échec.
|
| 126 |
+
|
| 127 |
+
La clé du succès réside dans la création d'un cadre déterministe rigoureux : il est préférable de restreindre l'agent à des tâches simples et bien encadrées, soutenues par un contexte précis, plutôt que de compter sur une autonomie totale. En résumé, l'intelligence du système ne provient pas uniquement du modèle de langage, mais de la structure qui guide et valide chacune de ses actions.
|
adaptive_knowledge.py
ADDED
|
@@ -0,0 +1,486 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
adaptive_knowledge.py
|
| 3 |
+
Système complet de gestion des connaissances adaptatives pour l'Agent ReAct.
|
| 4 |
+
Gère l'extraction, l'analyse, la qualité et la génération de la base de connaissances.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import re
|
| 9 |
+
import json
|
| 10 |
+
from typing import Dict, List, Optional, Tuple
|
| 11 |
+
|
| 12 |
+
# =============================================================================
|
| 13 |
+
# 1. UTILITAIRES DE SECTIONS
|
| 14 |
+
# =============================================================================
|
| 15 |
+
class SectionUtils:
|
| 16 |
+
"""
|
| 17 |
+
Utilitaires de section afin d'extraire des sections de markdown
|
| 18 |
+
"""
|
| 19 |
+
@staticmethod
|
| 20 |
+
def extract_section_content(content: str, section_name: str) -> str:
|
| 21 |
+
if not content: return ""
|
| 22 |
+
|
| 23 |
+
# On normalise le nom recherché pour la comparaison
|
| 24 |
+
# "LOCATION: Cave With Stream" -> "LOCATION: CAVE WITH STREAM"
|
| 25 |
+
target = section_name.strip().upper()
|
| 26 |
+
|
| 27 |
+
lines = content.split('\n')
|
| 28 |
+
section_data = []
|
| 29 |
+
found = False
|
| 30 |
+
|
| 31 |
+
for line in lines:
|
| 32 |
+
clean_line = line.strip().upper()
|
| 33 |
+
|
| 34 |
+
# Détection du header (on vérifie si la ligne contient le nom de la section)
|
| 35 |
+
# Cela règle le problème des "## LOCATION: O\nutside"
|
| 36 |
+
if clean_line.startswith("##") and target in clean_line:
|
| 37 |
+
found = True
|
| 38 |
+
continue
|
| 39 |
+
|
| 40 |
+
if found:
|
| 41 |
+
# Si on croise un autre header, on s'arrête
|
| 42 |
+
if clean_line.startswith("##"):
|
| 43 |
+
break
|
| 44 |
+
section_data.append(line)
|
| 45 |
+
|
| 46 |
+
return "\n".join(section_data).strip() if found else ""
|
| 47 |
+
|
| 48 |
+
@staticmethod
|
| 49 |
+
def update_section_content(content: str, section_name: str, new_content: str) -> str:
|
| 50 |
+
"""
|
| 51 |
+
Mettre à jour une section de connaissance
|
| 52 |
+
"""
|
| 53 |
+
if not content: content = "# Zork Strategic Knowledge Base\n\n"
|
| 54 |
+
section_header = f"## {section_name}"
|
| 55 |
+
pattern = rf"## {re.escape(section_name)}(.*?)(?=\n## |$)"
|
| 56 |
+
match = re.search(pattern, content, re.DOTALL)
|
| 57 |
+
|
| 58 |
+
# Ajout de la nouvelle section
|
| 59 |
+
full_new_section = f"{section_header}\n\n{new_content}\n"
|
| 60 |
+
|
| 61 |
+
# Si on a déjà une section on remplace sinon ajoute
|
| 62 |
+
if match:
|
| 63 |
+
return content.replace(match.group(0), full_new_section, 1)
|
| 64 |
+
else:
|
| 65 |
+
return f"{content}\n\n{full_new_section}\n"
|
| 66 |
+
|
| 67 |
+
@staticmethod
|
| 68 |
+
def extract_cross_episode_section(content: str) -> str:
|
| 69 |
+
"""Extrait la section 'Wisdom' qui doit persister entre les parties."""
|
| 70 |
+
return SectionUtils.extract_section_content(content, "CROSS-EPISODE INSIGHTS")
|
| 71 |
+
|
| 72 |
+
# =============================================================================
|
| 73 |
+
# 2. ANALYSEUR DE TOURS (Turn Extraction & Quality)
|
| 74 |
+
# =============================================================================
|
| 75 |
+
class TurnAnalyzer:
|
| 76 |
+
"""
|
| 77 |
+
Analyseur de tours de notre agent
|
| 78 |
+
"""
|
| 79 |
+
@staticmethod
|
| 80 |
+
def format_turn_window(history: list, start_index: int, end_index: int) -> Dict:
|
| 81 |
+
"""Convertit l'historique brut de l'agent en format structuré pour l'analyse."""
|
| 82 |
+
window = history[start_index:end_index]
|
| 83 |
+
|
| 84 |
+
turn_data = {
|
| 85 |
+
"start_turn": start_index + 1,
|
| 86 |
+
"end_turn": end_index,
|
| 87 |
+
"actions": [],
|
| 88 |
+
"death_events": [],
|
| 89 |
+
"score_changes": []
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
prev_score = 0
|
| 93 |
+
if start_index > 0:
|
| 94 |
+
prev_score = history[start_index-1].get("score", 0)
|
| 95 |
+
|
| 96 |
+
for i, step in enumerate(window):
|
| 97 |
+
# Pour chaque élément de la fenêtre
|
| 98 |
+
turn_num = start_index + 1 + i
|
| 99 |
+
|
| 100 |
+
# Action data
|
| 101 |
+
turn_data["actions"].append({
|
| 102 |
+
"turn": turn_num,
|
| 103 |
+
"thought": step.get("thought", ""),
|
| 104 |
+
"action": step.get("tool", "") + str(step.get("args", "")),
|
| 105 |
+
"result": step.get("result", "")
|
| 106 |
+
})
|
| 107 |
+
|
| 108 |
+
# Score tracking, tracking du score
|
| 109 |
+
current_score = step.get("score", 0)
|
| 110 |
+
if current_score != prev_score:
|
| 111 |
+
turn_data["score_changes"].append({
|
| 112 |
+
"location": current_location,
|
| 113 |
+
"action": step.get("tool", "") + str(step.get("args", "")),
|
| 114 |
+
"explanation": step.get("result", ""),
|
| 115 |
+
"turn": turn_num,
|
| 116 |
+
"from": prev_score,
|
| 117 |
+
"to": current_score
|
| 118 |
+
})
|
| 119 |
+
prev_score = current_score
|
| 120 |
+
|
| 121 |
+
# Death detection
|
| 122 |
+
if "game over" in step.get("result", "").lower() or "died" in step.get("result", "").lower():
|
| 123 |
+
turn_data["death_events"].append({
|
| 124 |
+
"turn": turn_num,
|
| 125 |
+
"reason": step.get("result", "")[:100], # Short preview
|
| 126 |
+
"context": step.get("thought", "")
|
| 127 |
+
})
|
| 128 |
+
|
| 129 |
+
return turn_data
|
| 130 |
+
|
| 131 |
+
@staticmethod
|
| 132 |
+
def check_quality(turn_data: Dict) -> Tuple[bool, str]:
|
| 133 |
+
"""Détermine si ces tours valent la peine d'être analysés."""
|
| 134 |
+
actions = turn_data["actions"]
|
| 135 |
+
|
| 136 |
+
# Toujours analyser les changements de scores ou les morts
|
| 137 |
+
if turn_data["death_events"]: return True, "Death event detected"
|
| 138 |
+
if turn_data["score_changes"]: return True, "Score progression detected"
|
| 139 |
+
|
| 140 |
+
# Véritifer si on a assez d'actions
|
| 141 |
+
if len(actions) < 2: return False, "Too few actions"
|
| 142 |
+
|
| 143 |
+
# 3. Vérifier la variété (éviter d'analyser quand l'agent boucle)
|
| 144 |
+
action_strs = [a["action"] for a in actions]
|
| 145 |
+
unique_actions = set(action_strs)
|
| 146 |
+
variety_ratio = len(unique_actions) / len(actions)
|
| 147 |
+
|
| 148 |
+
if variety_ratio < 0.2:
|
| 149 |
+
return False, f"Low variety ({variety_ratio:.1%}) - repetitive behavior"
|
| 150 |
+
|
| 151 |
+
return True, "Sufficient quality data"
|
| 152 |
+
|
| 153 |
+
# =============================================================================
|
| 154 |
+
# 3. GÉNÉRATEUR DE CONNAISSANCES (LLM Prompts)
|
| 155 |
+
# =============================================================================
|
| 156 |
+
class KnowledgeGenerator:
|
| 157 |
+
|
| 158 |
+
GLOBAL_RULES = """
|
| 159 |
+
- Player is a pragmatic adventurer.
|
| 160 |
+
- Movement: use cardinal directions (n, s, e, w, u, d).
|
| 161 |
+
- Interaction: You must 'examine' objects to find details.
|
| 162 |
+
- Combat: player prefers to 'hit' or 'smash' threats.
|
| 163 |
+
"""
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
SYSTEM_PROMPT = """
|
| 167 |
+
|
| 168 |
+
You are the Strategic Analyst for a text game Agent.
|
| 169 |
+
Your goal is to update the 'Strategic Knowledge Base' based on recent gameplay logs.
|
| 170 |
+
|
| 171 |
+
ARCHITECTURAL RULES:
|
| 172 |
+
1. **Universal Scope**: Focus on mechanics that apply ANYWHERE (e.g., "Darkness kills without light").
|
| 173 |
+
2. **Avoid Specifics**: Do NOT write "Go North from House". Write "Explore cardinal directions systematically".
|
| 174 |
+
3. **Analyze Failure**: If the agent died, identify the CAUSE and the PRINCIPLE to avoid it.
|
| 175 |
+
|
| 176 |
+
OUTPUT SECTIONS REQUIRED:
|
| 177 |
+
- **UNIVERSAL GAME MECHANICS**: Rules of physics/parser (e.g., "Containers must be opened").
|
| 178 |
+
- **DANGER CATEGORIES**: Types of threats (e.g., "Trolls require weapons").
|
| 179 |
+
- **STRATEGIC PRINCIPLES**: Heuristics for decision making.
|
| 180 |
+
- **DEATH & DANGER ANALYSIS**: Specific analysis of recent deaths.
|
| 181 |
+
|
| 182 |
+
Preserve existing 'CROSS-EPISODE INSIGHTS' if provided.
|
| 183 |
+
"""
|
| 184 |
+
|
| 185 |
+
KNOWLEDGE_DELTA_PROMPT = """You are the Knowledge Analyst for a text game Agent.
|
| 186 |
+
Your goal is to extract strictly **ABSOLUTE RULES** and **STRATEGIC LESSONS**.
|
| 187 |
+
|
| 188 |
+
Do NOT:
|
| 189 |
+
- Do not list exits or room descriptions (this is handled by Memory).
|
| 190 |
+
- Do not list items in the room.
|
| 191 |
+
|
| 192 |
+
DO IDENTIFY:
|
| 193 |
+
- Universal Mechanics: "Light is required in dark places", "Glass breaks when hit".
|
| 194 |
+
- Danger Patterns: "Falling from heights is fatal", "Water traps require swimming".
|
| 195 |
+
- High-Level Strategies: "Examine objects twice", "Save heavy items for later".
|
| 196 |
+
|
| 197 |
+
INPUTS:
|
| 198 |
+
1. EXISTING KNOWLEDGE: What we already know (do NOT repeat this).
|
| 199 |
+
2. NEW LOGS: Recent gameplay history and ON CURRENT LOCATION
|
| 200 |
+
|
| 201 |
+
INSTRUCTIONS:
|
| 202 |
+
1. Compare the Logs against Existing Knowledge.
|
| 203 |
+
2. Identify **ONLY** new information:
|
| 204 |
+
- New mechanics discovered.
|
| 205 |
+
- New map connections or object interactions.
|
| 206 |
+
4. **SILENCE**: If nothing new happened, output "NO_UPDATES".
|
| 207 |
+
5. ONLY output insights that are NOT already in the EXISTING KNOWLEDGE.
|
| 208 |
+
6. Be concise: One bullet point per new discovery.
|
| 209 |
+
7. DO NOT repeat the headers if you have no data for them.
|
| 210 |
+
|
| 211 |
+
CRITICAL CONSTRAINTS:
|
| 212 |
+
1. NO SURVIVAL TALK: GAME is not about food, water, or fatigue. Ignore these.
|
| 213 |
+
2. NO REPETITION: If an idea is already in 'EXISTING KNOWLEDGE', skip it.
|
| 214 |
+
3. BE TECHNICAL: Focus on "Item X opens Door Y" or "Light is needed in Dark rooms".
|
| 215 |
+
4. NO SPECULATION: Do not invent dangers (like hostile pigs) unless the logs explicitly show the agent taking damage or dying.
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
OUTPUT FORMAT (Markdown):
|
| 219 |
+
Only output the sections that have NEW content. Do not output empty sections.
|
| 220 |
+
|
| 221 |
+
## UNIVERSAL MECHANICS (New technical rules only)
|
| 222 |
+
- [New rule found]
|
| 223 |
+
|
| 224 |
+
## STRATEGIC LESSONS and STRATEGIC TIPS (New navigation/interaction heuristics)
|
| 225 |
+
- [Strategy]
|
| 226 |
+
- [New strategy found]
|
| 227 |
+
|
| 228 |
+
## DANGER ANALYSIS
|
| 229 |
+
- [Analysis of what killed player or blocked him]
|
| 230 |
+
|
| 231 |
+
## SCORE EVENTS
|
| 232 |
+
- [Action] in [Location] increased score because [Reason].
|
| 233 |
+
|
| 234 |
+
"""
|
| 235 |
+
|
| 236 |
+
@staticmethod
|
| 237 |
+
def build_prompt(turn_data: Dict, existing_knowledge: str) -> str:
|
| 238 |
+
|
| 239 |
+
logs_str = ""
|
| 240 |
+
for a in turn_data["actions"]:
|
| 241 |
+
logs_str += f"Turn {a['turn']}:\n"
|
| 242 |
+
logs_str += f" Thought: {a['thought']}\n"
|
| 243 |
+
logs_str += f" Action: {a['action']}\n"
|
| 244 |
+
logs_str += f" Result: {a['result'][:400]}...\n\n" # Truncate
|
| 245 |
+
|
| 246 |
+
# Events summary
|
| 247 |
+
events_str = ""
|
| 248 |
+
if turn_data["death_events"]:
|
| 249 |
+
events_str += f"*** DEATHS: {len(turn_data['death_events'])} ***\n"
|
| 250 |
+
if turn_data["score_changes"]:
|
| 251 |
+
events_str += f"*** SCORE CHANGES: {len(turn_data['score_changes'])} ***\n"
|
| 252 |
+
|
| 253 |
+
return f"""
|
| 254 |
+
ANALYZE THIS GAMEPLAY WINDOW (Turns {turn_data['start_turn']}-{turn_data['end_turn']}):
|
| 255 |
+
|
| 256 |
+
EVENTS:
|
| 257 |
+
{events_str}
|
| 258 |
+
|
| 259 |
+
LOGS:
|
| 260 |
+
{logs_str}
|
| 261 |
+
|
| 262 |
+
EXISTING KNOWLEDGE BASE:
|
| 263 |
+
------------------------
|
| 264 |
+
{existing_knowledge}
|
| 265 |
+
------------------------
|
| 266 |
+
|
| 267 |
+
INSTRUCTIONS:
|
| 268 |
+
Update the Knowledge Base.
|
| 269 |
+
1. Incorporate lessons from the logs above.
|
| 270 |
+
3. Remove duplicates.
|
| 271 |
+
4. Output ONLY the NEW additions in Markdown format.
|
| 272 |
+
"""
|
| 273 |
+
|
| 274 |
+
# =============================================================================
|
| 275 |
+
# 4. LE MANAGER PRINCIPAL (L'Orchestrateur)
|
| 276 |
+
# =============================================================================
|
| 277 |
+
class AdaptiveKnowledgeManager:
|
| 278 |
+
def __init__(self, output_file: str = "knowledgebase.md"):
|
| 279 |
+
self.output_file = output_file
|
| 280 |
+
|
| 281 |
+
def get_strategic_knowledge(self:str) -> str:
|
| 282 |
+
full_kb = self.load_knowledge()
|
| 283 |
+
if not full_kb: return ""
|
| 284 |
+
|
| 285 |
+
universal = SectionUtils.extract_section_content(full_kb, "UNIVERSAL MECHANICS")
|
| 286 |
+
lessons = SectionUtils.extract_section_content(full_kb, "STRATEGIC LESSONS")
|
| 287 |
+
dangers = SectionUtils.extract_section_content(full_kb, "DANGER ANALYSIS")
|
| 288 |
+
score_events = SectionUtils.extract_section_content(full_kb, "SCORE EVENTS")
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
return f"""
|
| 292 |
+
### 📜 WORLD RULES
|
| 293 |
+
{KnowledgeGenerator.GLOBAL_RULES}
|
| 294 |
+
|
| 295 |
+
### 🧠 EVOLVED STRATEGIES
|
| 296 |
+
{universal if universal else ""}
|
| 297 |
+
{lessons if lessons else ""}
|
| 298 |
+
|
| 299 |
+
### ⚠️ LETHAL LESSONS
|
| 300 |
+
{dangers if dangers else "No fatal errors recorded yet."}
|
| 301 |
+
"""
|
| 302 |
+
|
| 303 |
+
def load_knowledge(self) -> str:
|
| 304 |
+
if os.path.exists(self.output_file):
|
| 305 |
+
with open(self.output_file, "r", encoding="utf-8") as f:
|
| 306 |
+
return f.read()
|
| 307 |
+
return ""
|
| 308 |
+
|
| 309 |
+
def update_knowledge(self, history: list, start_idx: int, end_idx: int, llm_function) -> bool:
|
| 310 |
+
"""
|
| 311 |
+
Orchestre la mise à jour des connaissances.
|
| 312 |
+
Args:
|
| 313 |
+
history: La liste self.history de l'agent
|
| 314 |
+
start_idx: Début de la fenêtre
|
| 315 |
+
end_idx: Fin de la fenêtre
|
| 316 |
+
llm_function: La fonction call_llm(prompt, system_prompt) de l'agent
|
| 317 |
+
"""
|
| 318 |
+
|
| 319 |
+
print(f"\n[KNOWLEDGE] Assessing turns {start_idx+1}-{end_idx}...")
|
| 320 |
+
|
| 321 |
+
# 1. Extraction et formatage
|
| 322 |
+
turn_data = TurnAnalyzer.format_turn_window(history, start_idx, end_idx)
|
| 323 |
+
|
| 324 |
+
# 2. Vérification qualité (Est-ce que ça vaut le coup ?)
|
| 325 |
+
should_update, reason = TurnAnalyzer.check_quality(turn_data)
|
| 326 |
+
if not should_update:
|
| 327 |
+
print(f"[KNOWLEDGE] Skipping update: {reason}")
|
| 328 |
+
return False
|
| 329 |
+
|
| 330 |
+
# 3. Chargement existant
|
| 331 |
+
current_knowledge = self.load_knowledge()
|
| 332 |
+
|
| 333 |
+
# 4. Construction du Prompt
|
| 334 |
+
prompt = KnowledgeGenerator.build_prompt(turn_data, current_knowledge)
|
| 335 |
+
|
| 336 |
+
# 5. Appel LLM (Génération)
|
| 337 |
+
print(f"[KNOWLEDGE] Generating insights (Reason: {reason})...")
|
| 338 |
+
try:
|
| 339 |
+
# On utilise une température un peu plus haute pour la créativité analytique
|
| 340 |
+
# CORRECTION 1: On nomme la variable delta_response pour la suite
|
| 341 |
+
delta_response = llm_function(
|
| 342 |
+
prompt=prompt,
|
| 343 |
+
system_prompt=KnowledgeGenerator.KNOWLEDGE_DELTA_PROMPT,
|
| 344 |
+
seed=42
|
| 345 |
+
)
|
| 346 |
+
|
| 347 |
+
# CORRECTION 2: Indentation alignée avec delta_response
|
| 348 |
+
if "NO_UPDATES" not in delta_response and len(delta_response) > 10:
|
| 349 |
+
# C'est ici que la magie opère : Python colle les bouts
|
| 350 |
+
new_full_knowledge = SmartKnowledgeMerger.merge_delta(current_knowledge, delta_response)
|
| 351 |
+
|
| 352 |
+
# Sauvegarde (Attention: indentation DANS le if précédent car new_full_knowledge n'existe que si on rentre ici)
|
| 353 |
+
if new_full_knowledge != current_knowledge:
|
| 354 |
+
with open(self.output_file, "w", encoding="utf-8") as f:
|
| 355 |
+
f.write(new_full_knowledge)
|
| 356 |
+
print(f"[KNOWLEDGE] Added new insights to database. (+{len(delta_response)} chars)")
|
| 357 |
+
return True
|
| 358 |
+
|
| 359 |
+
# CORRECTION 3: Ajout du bloc except obligatoire
|
| 360 |
+
except Exception as e:
|
| 361 |
+
print(f"[KNOWLEDGE] Error during generation: {e}")
|
| 362 |
+
|
| 363 |
+
print("[KNOWLEDGE] No new insights found.")
|
| 364 |
+
return False
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
import re
|
| 368 |
+
|
| 369 |
+
class SmartKnowledgeMerger:
|
| 370 |
+
@staticmethod
|
| 371 |
+
def merge_delta(existing_content: str, delta_content: str) -> str:
|
| 372 |
+
if "NO_UPDATES" in delta_content or not delta_content.strip():
|
| 373 |
+
return existing_content
|
| 374 |
+
|
| 375 |
+
if not existing_content:
|
| 376 |
+
return "# Zork Strategic Knowledge Base\n\n" + delta_content
|
| 377 |
+
|
| 378 |
+
updated_content = existing_content
|
| 379 |
+
|
| 380 |
+
# On découpe par sections principales (## HEADER)
|
| 381 |
+
sections = re.split(r'(## [A-Z :-_]+)', delta_content)
|
| 382 |
+
|
| 383 |
+
current_header = None
|
| 384 |
+
for part in sections:
|
| 385 |
+
part = part.strip()
|
| 386 |
+
if not part: continue
|
| 387 |
+
|
| 388 |
+
if part.startswith("##"):
|
| 389 |
+
current_header = part.replace("## ", "")
|
| 390 |
+
elif current_header:
|
| 391 |
+
# Si c'est une fiche lieu, on utilise un traitement spécial
|
| 392 |
+
if "LOCATION" in current_header:
|
| 393 |
+
updated_content = SmartKnowledgeMerger._merge_location_sheet(
|
| 394 |
+
updated_content, current_header, part
|
| 395 |
+
)
|
| 396 |
+
else:
|
| 397 |
+
# Traitement standard pour UNIVERSAL MECHANICS, DANGERS, etc.
|
| 398 |
+
updated_content = SmartKnowledgeMerger._append_to_section(
|
| 399 |
+
updated_content, current_header, part
|
| 400 |
+
)
|
| 401 |
+
|
| 402 |
+
return updated_content
|
| 403 |
+
|
| 404 |
+
# @staticmethod
|
| 405 |
+
# def _merge_location_sheet(full_text: str, location_header: str, new_sheet_data: str) -> str:
|
| 406 |
+
# """Fusionne une fiche lieu en mettant à jour les champs spécifiques."""
|
| 407 |
+
# pattern = rf"(## {re.escape(location_header)})(.*?)(?=\n## |$)"
|
| 408 |
+
# match = re.search(pattern, full_text, re.DOTALL)
|
| 409 |
+
|
| 410 |
+
# if not match:
|
| 411 |
+
# # Nouveau lieu, on l'ajoute simplement
|
| 412 |
+
# return f"{full_text.strip()}\n\n## {location_header}\n{new_sheet_data}\n"
|
| 413 |
+
|
| 414 |
+
# existing_body = match.group(2)
|
| 415 |
+
|
| 416 |
+
# # On fusionne les lignes préfixées (ex: - **EXITS**:, - **KNOWLEDGE**:)
|
| 417 |
+
# # Cette logique permet d'écraser les listes d'objets ou d'ajouter des faits
|
| 418 |
+
# new_lines = new_sheet_data.strip().split('\n')
|
| 419 |
+
# updated_body = existing_body
|
| 420 |
+
|
| 421 |
+
# for line in new_lines:
|
| 422 |
+
# line = line.strip()
|
| 423 |
+
# if not line: continue
|
| 424 |
+
|
| 425 |
+
# # Si la ligne est une catégorie (ex: - **ENTITIES**:), on vérifie si elle existe
|
| 426 |
+
# prefix_match = re.match(r"(- \*\*[A-Z ]+\*\*):", line)
|
| 427 |
+
# if prefix_match:
|
| 428 |
+
# prefix = prefix_match.group(1)
|
| 429 |
+
# # Si le préfixe existe déjà, on remplace la ligne (car les listes d'objets changent)
|
| 430 |
+
# if prefix in updated_body:
|
| 431 |
+
# updated_body = re.sub(rf"{re.escape(prefix)}:.*", line, updated_body)
|
| 432 |
+
# else:
|
| 433 |
+
# updated_body += f"\n{line}"
|
| 434 |
+
# else:
|
| 435 |
+
# # C'est un point de connaissance (Knowledge), on l'ajoute via _append_to_section logic
|
| 436 |
+
# if line.lower() not in updated_body.lower():
|
| 437 |
+
# updated_body += f"\n{line}"
|
| 438 |
+
|
| 439 |
+
# return full_text.replace(match.group(0), f"## {location_header}\n{updated_body}\n", 1)
|
| 440 |
+
|
| 441 |
+
@staticmethod
|
| 442 |
+
def _append_to_section(full_text: str, section_name: str, text_to_add: str) -> str:
|
| 443 |
+
"""Ajoute du texte à la fin d'une section existante."""
|
| 444 |
+
new_lines = [l.strip() for l in text_to_add.strip().split('\n') if l.strip().startswith(('-', '*'))]
|
| 445 |
+
|
| 446 |
+
# Chercher la section
|
| 447 |
+
pattern = rf"(## {re.escape(section_name)})(.*?)(?=\n## |$)"
|
| 448 |
+
match = re.search(pattern, full_text, re.DOTALL)
|
| 449 |
+
|
| 450 |
+
if match:
|
| 451 |
+
# La section existe, on ajoute à la fin
|
| 452 |
+
header = match.group(1)
|
| 453 |
+
existing_body = match.group(2).rstrip()
|
| 454 |
+
|
| 455 |
+
existing_lines_set = set([l.strip() for l in existing_body.strip().split('\n') if l.strip()])
|
| 456 |
+
|
| 457 |
+
|
| 458 |
+
|
| 459 |
+
filtered_new_lines = []
|
| 460 |
+
for nl in new_lines:
|
| 461 |
+
clean_nl = nl.lower()
|
| 462 |
+
if clean_nl in existing_body:
|
| 463 |
+
continue
|
| 464 |
+
|
| 465 |
+
important_words = set([w for w in clean_nl.split() if len(w) > 4])
|
| 466 |
+
is_redundant = False
|
| 467 |
+
for existing_line in existing_body.split('\n'):
|
| 468 |
+
words_in_existing = set([w for w in existing_line.split() if len(w) > 4])
|
| 469 |
+
# Si plus de 70% des mots importants sont déjà dans une ligne existante
|
| 470 |
+
if important_words and len(important_words & words_in_existing) / len(important_words) > 0.7:
|
| 471 |
+
is_redundant = True
|
| 472 |
+
break
|
| 473 |
+
|
| 474 |
+
if nl not in existing_lines_set and nl.lower() not in [el.lower() for el in existing_lines_set] and not is_redundant:
|
| 475 |
+
filtered_new_lines.append(nl)
|
| 476 |
+
existing_lines_set.add(nl) # On l'ajoute au set pour éviter les doublons au sein du même delta
|
| 477 |
+
|
| 478 |
+
if not filtered_new_lines:
|
| 479 |
+
return full_text # Rien de nouveau, on ne touche à rien
|
| 480 |
+
|
| 481 |
+
# 4. Reconstruction propre
|
| 482 |
+
updated_body = existing_body.rstrip() + "\n" + "\n".join(filtered_new_lines)
|
| 483 |
+
return full_text.replace(match.group(0), f"{header}\n{updated_body}\n", 1)
|
| 484 |
+
else:
|
| 485 |
+
# La section n'existe pas, on la crée à la fin
|
| 486 |
+
return f"{full_text.strip()}\n\n## {section_name}\n{text_to_add}\n"
|
agent.py
CHANGED
|
@@ -1,26 +1,8 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
Your agent should:
|
| 8 |
-
1. Connect to the MCP server via the provided client
|
| 9 |
-
2. Use the ReAct pattern (Thought -> Action -> Observation)
|
| 10 |
-
3. Call MCP tools to interact with the game
|
| 11 |
-
4. Maximize the game score within the step limit
|
| 12 |
-
|
| 13 |
-
Required method:
|
| 14 |
-
async def run(self, client, game, max_steps, seed, verbose) -> RunResult
|
| 15 |
-
|
| 16 |
-
The 'client' is a FastMCP Client already connected to your MCP server.
|
| 17 |
-
Use it to call tools like: await client.call_tool("play_action", {"action": "look"})
|
| 18 |
-
|
| 19 |
-
Tips:
|
| 20 |
-
- Start by looking around and understanding your environment
|
| 21 |
-
- Keep track of visited locations to avoid loops
|
| 22 |
-
- Pick up useful items (lamp, sword, etc.)
|
| 23 |
-
- The seed parameter should be used to set your LLM's seed for reproducibility
|
| 24 |
"""
|
| 25 |
|
| 26 |
import json
|
|
@@ -32,44 +14,43 @@ from typing import Optional
|
|
| 32 |
from dotenv import load_dotenv
|
| 33 |
from huggingface_hub import InferenceClient
|
| 34 |
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
load_dotenv()
|
| 37 |
|
| 38 |
# =============================================================================
|
| 39 |
# LLM Configuration - DO NOT MODIFY
|
| 40 |
# =============================================================================
|
| 41 |
|
| 42 |
-
# Model to use (fixed for fair evaluation)
|
| 43 |
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
# Initialize the LLM client (uses HF_TOKEN from environment)
|
| 46 |
_hf_token = os.getenv("HF_TOKEN")
|
| 47 |
if not _hf_token:
|
| 48 |
raise ValueError("HF_TOKEN not found. Set it in your .env file.")
|
| 49 |
|
| 50 |
LLM_CLIENT = InferenceClient(token=_hf_token)
|
| 51 |
|
| 52 |
-
|
| 53 |
def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
|
| 54 |
-
"""
|
| 55 |
-
Call the LLM with the given prompt. Use this function in your agent.
|
| 56 |
-
|
| 57 |
-
Args:
|
| 58 |
-
prompt: The user prompt (current game state, history, etc.)
|
| 59 |
-
system_prompt: The system prompt (instructions for the agent)
|
| 60 |
-
seed: Random seed for reproducibility
|
| 61 |
-
max_tokens: Maximum tokens in response (default: 300)
|
| 62 |
-
|
| 63 |
-
Returns:
|
| 64 |
-
The LLM's response text
|
| 65 |
-
|
| 66 |
-
Example:
|
| 67 |
-
response = call_llm(
|
| 68 |
-
prompt="You are in a forest. What do you do?",
|
| 69 |
-
system_prompt=SYSTEM_PROMPT,
|
| 70 |
-
seed=42,
|
| 71 |
-
)
|
| 72 |
-
"""
|
| 73 |
messages = [
|
| 74 |
{"role": "system", "content": system_prompt},
|
| 75 |
{"role": "user", "content": prompt},
|
|
@@ -78,7 +59,7 @@ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300)
|
|
| 78 |
response = LLM_CLIENT.chat.completions.create(
|
| 79 |
model=LLM_MODEL,
|
| 80 |
messages=messages,
|
| 81 |
-
temperature=0.0,
|
| 82 |
max_tokens=max_tokens,
|
| 83 |
seed=seed,
|
| 84 |
)
|
|
@@ -99,179 +80,1923 @@ class RunResult:
|
|
| 99 |
|
| 100 |
|
| 101 |
# =============================================================================
|
| 102 |
-
#
|
| 103 |
# =============================================================================
|
| 104 |
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
-
|
|
|
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
- inventory: Check what you're carrying (if implemented)
|
| 113 |
|
| 114 |
-
|
| 115 |
-
- Movement: north, south, east, west, up, down, enter, exit
|
| 116 |
-
- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
|
| 117 |
-
- Other: look, inventory, read <thing>, turn on lamp
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
ARGS: <JSON arguments, e.g., {"action": "look"}>
|
| 123 |
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
TOOL: play_action
|
| 127 |
-
ARGS: {"action": "look"}
|
| 128 |
-
"""
|
| 129 |
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
| 134 |
|
| 135 |
-
class StudentAgent:
|
| 136 |
-
"""
|
| 137 |
-
Your ReAct agent implementation.
|
| 138 |
-
|
| 139 |
-
TODO:
|
| 140 |
-
1. Implement the run() method with the ReAct loop
|
| 141 |
-
2. Parse LLM responses to extract tool calls
|
| 142 |
-
3. Track state and avoid loops
|
| 143 |
-
|
| 144 |
-
Use the provided call_llm() function to interact with the LLM.
|
| 145 |
-
"""
|
| 146 |
-
|
| 147 |
-
def __init__(self):
|
| 148 |
-
"""Initialize your agent here."""
|
| 149 |
-
# TODO: Initialize any state tracking you need
|
| 150 |
-
# self.history = []
|
| 151 |
-
# self.visited_locations = set()
|
| 152 |
-
pass
|
| 153 |
-
|
| 154 |
async def run(
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
verbose:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
"""
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
# Basic structure:
|
| 178 |
-
# 1. Get initial observation (call play_action with "look")
|
| 179 |
-
# 2. Loop for max_steps:
|
| 180 |
-
# a. Build prompt with current observation and history
|
| 181 |
-
# b. Call LLM to get thought and action
|
| 182 |
-
# c. Parse the response to extract tool and args
|
| 183 |
-
# d. Call the tool via client.call_tool(tool_name, args)
|
| 184 |
-
# e. Update history and state
|
| 185 |
-
# f. Check for game over
|
| 186 |
-
# 3. Return RunResult with final statistics
|
| 187 |
-
|
| 188 |
-
# Example of calling a tool:
|
| 189 |
-
# result = await client.call_tool("play_action", {"action": "look"})
|
| 190 |
-
# observation = result[0].text if result else "No response"
|
| 191 |
-
|
| 192 |
-
# Example of calling the LLM:
|
| 193 |
-
# response = call_llm(
|
| 194 |
-
# prompt="Current observation: " + observation,
|
| 195 |
-
# system_prompt=SYSTEM_PROMPT,
|
| 196 |
-
# seed=seed,
|
| 197 |
-
# )
|
| 198 |
-
|
| 199 |
-
# Placeholder implementation - replace with your code
|
| 200 |
-
locations_visited = set()
|
| 201 |
-
history = []
|
| 202 |
-
final_score = 0
|
| 203 |
-
moves = 0
|
| 204 |
-
|
| 205 |
-
# TODO: Your implementation here
|
| 206 |
-
# ...
|
| 207 |
-
|
| 208 |
-
return RunResult(
|
| 209 |
-
final_score=final_score,
|
| 210 |
-
max_score=350, # Zork1 max score, adjust if needed
|
| 211 |
-
moves=moves,
|
| 212 |
-
locations_visited=locations_visited,
|
| 213 |
-
game_completed=False,
|
| 214 |
-
history=history,
|
| 215 |
-
)
|
| 216 |
-
|
| 217 |
-
def _build_prompt(self, observation: str, history: list) -> str:
|
| 218 |
"""
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
"""
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
"""
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
Call the LLM with the given prompt.
|
| 244 |
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
|
| 250 |
# =============================================================================
|
| 251 |
-
#
|
| 252 |
# =============================================================================
|
| 253 |
|
| 254 |
async def test_agent():
|
| 255 |
"""Test the agent locally."""
|
| 256 |
from fastmcp import Client
|
| 257 |
|
| 258 |
-
# Path to your MCP server
|
| 259 |
-
server_path = "mcp_server.py"
|
| 260 |
-
|
| 261 |
agent = StudentAgent()
|
| 262 |
|
| 263 |
-
async with Client(
|
| 264 |
result = await agent.run(
|
| 265 |
client=client,
|
| 266 |
game="zork1",
|
| 267 |
-
max_steps=
|
| 268 |
seed=42,
|
| 269 |
verbose=True,
|
| 270 |
)
|
| 271 |
|
| 272 |
-
print(f"\
|
|
|
|
| 273 |
print(f"Moves: {result.moves}")
|
| 274 |
-
print(f"Locations: {result.locations_visited}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
|
| 277 |
if __name__ == "__main__":
|
|
|
|
| 1 |
"""
|
| 2 |
+
Example: MCP ReAct Agent
|
| 3 |
|
| 4 |
+
A complete ReAct agent that uses MCP tools to play text adventure games.
|
| 5 |
+
This is a working example students can learn from.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import json
|
|
|
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
from huggingface_hub import InferenceClient
|
| 16 |
|
| 17 |
+
from typing import List, Optional, Dict
|
| 18 |
+
from pydantic import BaseModel
|
| 19 |
+
import json
|
| 20 |
+
|
| 21 |
+
from adaptive_knowledge import AdaptiveKnowledgeManager
|
| 22 |
+
from spatial_memory import SpatialMemorySystem
|
| 23 |
+
from memory import HierarchicalMemoryManager
|
| 24 |
+
from prompts import (
|
| 25 |
+
SYSTEM_PROMPT,
|
| 26 |
+
PLANNER_SYSTEM_PROMPT,
|
| 27 |
+
EXTRACTOR_SYSTEM_PROMPT,
|
| 28 |
+
CRITIC_SYSTEM_PROMPT,
|
| 29 |
+
MEMORY_SYNTHESIS_PROMPT
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
load_dotenv()
|
| 33 |
|
| 34 |
# =============================================================================
|
| 35 |
# LLM Configuration - DO NOT MODIFY
|
| 36 |
# =============================================================================
|
| 37 |
|
|
|
|
| 38 |
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
|
| 39 |
+
# LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct:featherless-ai"
|
| 40 |
+
# LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct::fireworks-ai"
|
| 41 |
+
# LLM_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
|
| 42 |
+
# LLM_MODEL = "Qwen/Qwen2.5-7B-Instruct"
|
| 43 |
+
# LLM_MODEL = "meta-llama/Llama-3.2-3B-Instruct"
|
| 44 |
+
# LLM_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 45 |
|
|
|
|
| 46 |
_hf_token = os.getenv("HF_TOKEN")
|
| 47 |
if not _hf_token:
|
| 48 |
raise ValueError("HF_TOKEN not found. Set it in your .env file.")
|
| 49 |
|
| 50 |
LLM_CLIENT = InferenceClient(token=_hf_token)
|
| 51 |
|
|
|
|
| 52 |
def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
|
| 53 |
+
"""Call the LLM with the given prompt."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
messages = [
|
| 55 |
{"role": "system", "content": system_prompt},
|
| 56 |
{"role": "user", "content": prompt},
|
|
|
|
| 59 |
response = LLM_CLIENT.chat.completions.create(
|
| 60 |
model=LLM_MODEL,
|
| 61 |
messages=messages,
|
| 62 |
+
temperature=0.0,
|
| 63 |
max_tokens=max_tokens,
|
| 64 |
seed=seed,
|
| 65 |
)
|
|
|
|
| 80 |
|
| 81 |
|
| 82 |
# =============================================================================
|
| 83 |
+
# Student Agent Implementation
|
| 84 |
# =============================================================================
|
| 85 |
|
| 86 |
+
class StudentAgent:
|
| 87 |
+
"""
|
| 88 |
+
MCP ReAct Agent - A complete working example.
|
| 89 |
+
|
| 90 |
+
This agent demonstrates:
|
| 91 |
+
- ReAct loop (Thought -> Tool -> Observation)
|
| 92 |
+
- Loop detection
|
| 93 |
+
- Action validation
|
| 94 |
+
- Score tracking via memory tool
|
| 95 |
+
"""
|
| 96 |
+
|
| 97 |
+
def __init__(self):
|
| 98 |
+
"""Initialize the agent state."""
|
| 99 |
+
self.history: list[dict] = []
|
| 100 |
+
self.recent_actions: list[str] = []
|
| 101 |
|
| 102 |
+
self.score: int = 0
|
| 103 |
+
self.strategist = StrategyModule()
|
| 104 |
|
| 105 |
+
self.critic = CriticAgent(call_llm_func=call_llm, verbose=True)
|
| 106 |
+
self.knowledge_manager = AdaptiveKnowledgeManager("knowledgebase.md")
|
| 107 |
+
self.world_mapper = WorldMapper()
|
|
|
|
| 108 |
|
| 109 |
+
self.extractor = ObservationExtractor(call_llm)
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
+
self.learning_interval = 10
|
| 112 |
+
self.last_learning_step = 0
|
| 113 |
+
self.seen_responses = {}
|
|
|
|
| 114 |
|
| 115 |
+
self.last_room_id = None # Utilise celui-ci pour la logique ID
|
| 116 |
+
self.last_world_hash = None
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
+
self.memory_manager = HierarchicalMemoryManager(call_llm)
|
| 119 |
+
self.current_location = "Start"
|
| 120 |
+
self.location_action_memory = {}
|
| 121 |
|
| 122 |
+
self.steps_in_current_room = 0
|
| 123 |
+
|
| 124 |
+
self.current_room_actions = set()
|
| 125 |
+
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
async def run(
|
| 128 |
+
self,
|
| 129 |
+
client,
|
| 130 |
+
game: str,
|
| 131 |
+
max_steps: int,
|
| 132 |
+
seed: int,
|
| 133 |
+
verbose: bool = False,
|
| 134 |
+
) -> RunResult:
|
| 135 |
+
"""Run the agentknowledge_content = self.knowledge_manager.load_knowledge()
|
| 136 |
+
if verbose and knowledge_content:
|
| 137 |
+
print(f"\n[INIT] Loaded strategic knowledge ({len(knowledge_content)} chars)")
|
| 138 |
+
for a game session."""
|
| 139 |
+
locations_visited = set()
|
| 140 |
+
history = []
|
| 141 |
+
moves = 0
|
| 142 |
+
|
| 143 |
+
# Charger la connaissance au démarrage
|
| 144 |
+
knowledge_content = self.knowledge_manager.load_knowledge()
|
| 145 |
+
if verbose and knowledge_content:
|
| 146 |
+
print(f"\n[INIT] Loaded strategic knowledge ({len(knowledge_content)} chars)")
|
| 147 |
+
|
| 148 |
+
# Get list of available tools
|
| 149 |
+
tools = await client.list_tools()
|
| 150 |
+
tool_names = [t.name for t in tools]
|
| 151 |
+
|
| 152 |
+
# Get initial observation
|
| 153 |
+
result = await client.call_tool("play_action", {"action": "look"})
|
| 154 |
+
observation = self._extract_result(result)
|
| 155 |
+
|
| 156 |
+
loc_result = await client.call_tool("get_location_info", {})
|
| 157 |
+
raw_res = self._extract_result(loc_result)
|
| 158 |
+
|
| 159 |
+
if isinstance(raw_res, str):
|
| 160 |
+
import json
|
| 161 |
+
loc_dict = json.loads(raw_res)
|
| 162 |
+
else:
|
| 163 |
+
loc_dict = raw_res
|
| 164 |
+
|
| 165 |
+
structured_data = self.extractor.extract(
|
| 166 |
+
raw_text=observation,
|
| 167 |
+
seed=seed + 0,
|
| 168 |
+
ram_data=loc_dict,
|
| 169 |
+
last_location=self.current_location
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
self.world_mapper.update_map(structured_data, "look", observation)
|
| 173 |
|
| 174 |
+
|
| 175 |
+
# Track initial location
|
| 176 |
+
# location = observation.split("\n")[0] if observation else "Unknown"
|
| 177 |
+
# locations_visited.add(location)
|
| 178 |
+
|
| 179 |
+
if verbose:
|
| 180 |
+
print(f"\n{observation}")
|
| 181 |
+
|
| 182 |
+
# Main ReAct loop
|
| 183 |
+
for step in range(1, max_steps + 1):
|
| 184 |
+
|
| 185 |
+
raw_possible_actions = "No actions available"
|
| 186 |
+
structured_data = {"location_name": self.current_location, "is_new_location": False}
|
| 187 |
+
priority_guidance = ""
|
| 188 |
+
|
| 189 |
+
try:
|
| 190 |
+
inv_result = await client.call_tool("inventory", {})
|
| 191 |
+
current_inv = self._extract_result(inv_result)
|
| 192 |
+
|
| 193 |
+
loc_result = await client.call_tool("get_location_info", {})
|
| 194 |
+
raw_res = self._extract_result(loc_result)
|
| 195 |
+
|
| 196 |
+
if isinstance(raw_res, str):
|
| 197 |
+
import json
|
| 198 |
+
loc_dict = json.loads(raw_res)
|
| 199 |
+
else:
|
| 200 |
+
loc_dict = raw_res
|
| 201 |
+
|
| 202 |
+
if loc_dict.get("status") == "success":
|
| 203 |
+
current_loc_id = loc_dict["location"].get("id")
|
| 204 |
+
current_loc_name = loc_dict["location"].get("name")
|
| 205 |
+
current_world_hash = loc_dict.get("world_hash")
|
| 206 |
+
|
| 207 |
+
objects_in_room = [obj["name"] for obj in loc_dict.get("detected_objects", [])]
|
| 208 |
+
else:
|
| 209 |
+
print("⚠️ Erreur lors de la récupération des données RAM")
|
| 210 |
+
|
| 211 |
+
is_new_room = False
|
| 212 |
+
if current_loc_id != self.last_room_id:
|
| 213 |
+
is_new_room = True
|
| 214 |
+
self.steps_in_current_room = 0
|
| 215 |
+
print(f"🚀 Mouvement détecté vers : {current_loc_name} (ID: {current_loc_id})")
|
| 216 |
+
self.last_room_id = current_loc_id
|
| 217 |
+
self.current_location = current_loc_name
|
| 218 |
+
else :
|
| 219 |
+
self.steps_in_current_room += 1
|
| 220 |
+
|
| 221 |
+
world_changed = False
|
| 222 |
+
if current_world_hash != self.last_world_hash:
|
| 223 |
+
world_changed = True
|
| 224 |
+
print(f"🔍 Le monde a changé (objet déplacé/modifié)")
|
| 225 |
+
self.last_world_hash = current_world_hash
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
map_result = await client.call_tool("get_map", {})
|
| 229 |
+
current_map = self._extract_result(map_result)
|
| 230 |
+
|
| 231 |
+
# Extraction structurée
|
| 232 |
+
structured_data = self.extractor.extract(
|
| 233 |
+
raw_text=observation,
|
| 234 |
+
seed=seed + step,
|
| 235 |
+
ram_data=loc_dict,
|
| 236 |
+
last_location=self.current_location
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
print("structured_data run loop ", structured_data)
|
| 240 |
+
|
| 241 |
+
structured_data["location_id"] = current_loc_id
|
| 242 |
+
structured_data["location_name"] = current_loc_name
|
| 243 |
+
structured_data["is_new_location"] = is_new_room
|
| 244 |
+
structured_data["world_changed"] = world_changed
|
| 245 |
+
|
| 246 |
+
should_refresh_actions = is_new_room or world_changed
|
| 247 |
+
|
| 248 |
+
if should_refresh_actions:
|
| 249 |
+
cheat_result = await client.call_tool("get_valid_actions_cheat", {})
|
| 250 |
+
new_actions = self._extract_result(cheat_result)
|
| 251 |
+
|
| 252 |
+
if is_new_room:
|
| 253 |
+
self.current_room_actions = set()
|
| 254 |
+
|
| 255 |
+
if isinstance(new_actions, list):
|
| 256 |
+
self.current_room_actions.update(new_actions)
|
| 257 |
+
elif isinstance(new_actions, str):
|
| 258 |
+
self.current_room_actions.update([a.strip() for a in new_actions.split(',')])
|
| 259 |
+
|
| 260 |
+
structured_data["cheat_actions"] = list(self.current_room_actions)
|
| 261 |
+
|
| 262 |
+
# valid_actions_result = await client.call_tool("get_valid_actions_cheat", {})
|
| 263 |
+
# raw_possible_actions = self._extract_result(valid_actions_result)
|
| 264 |
+
priority_guidance = self._build_priority_guidance(structured_data)
|
| 265 |
+
print("priority guidance",priority_guidance)
|
| 266 |
+
print("fin PRIORITY")
|
| 267 |
+
|
| 268 |
+
except Exception as e:
|
| 269 |
+
print(f"⚠️ Sensory Error: {e}")
|
| 270 |
+
structured_data = {}
|
| 271 |
+
current_inv = "Unknown"
|
| 272 |
+
current_map = "Unknown"
|
| 273 |
+
|
| 274 |
+
is_urgent = False
|
| 275 |
+
|
| 276 |
+
enriched_actions = self._generate_enriched_actions(structured_data)
|
| 277 |
+
|
| 278 |
+
# strategic_knowledge = self.knowledge_manager.get_strategic_knowledge()
|
| 279 |
+
|
| 280 |
+
# memory_context = self.memory_manager.get_context(self.current_location)
|
| 281 |
+
|
| 282 |
+
# visible_exits = structured_data.get("visible_exits", [])
|
| 283 |
+
# visible_objs = structured_data.get("visible_objects", [])
|
| 284 |
+
# visible_entities = structured_data.get("visible_entities", [])
|
| 285 |
+
# is_urgent = structured_data.get("in_combat_or_chase", False)
|
| 286 |
+
# summary = structured_data.get("description_summary", "")
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
# if hasattr(self, 'memory_manager'):
|
| 290 |
+
# memory_context = self.memory_manager.get_context(current_loc)
|
| 291 |
+
# else:
|
| 292 |
+
# memory_context = ""
|
| 293 |
+
|
| 294 |
+
# ============================================================
|
| 295 |
+
# 1. ADAPTIVE KNOWLEDGE CYCLE
|
| 296 |
+
# ============================================================
|
| 297 |
+
# On vérifie si on doit apprendre (intervalle ou mort détectée dans l'obs précédente)
|
| 298 |
+
is_dead = self._is_game_over(observation)
|
| 299 |
+
time_to_learn = (step - self.last_learning_step >= self.learning_interval)
|
| 300 |
+
|
| 301 |
+
# if (time_to_learn or is_dead) and len(self.history) > 0:
|
| 302 |
+
# if verbose: print(f"\n[KNOWLEDGE] analyzing recent turns to extract wisdom...")
|
| 303 |
+
# updated = self.knowledge_manager.update_knowledge(
|
| 304 |
+
# history=self.history,
|
| 305 |
+
# start_idx=self.last_learning_step,
|
| 306 |
+
# end_idx=len(self.history),
|
| 307 |
+
# llm_function=lambda prompt, system_prompt, seed: call_llm(
|
| 308 |
+
# prompt, # Premier arg
|
| 309 |
+
# system_prompt, # Deuxième arg
|
| 310 |
+
# seed # Troisième arg
|
| 311 |
+
# )
|
| 312 |
+
# )
|
| 313 |
+
|
| 314 |
+
# if updated:
|
| 315 |
+
# knowledge_content = self.knowledge_manager.load_knowledge()
|
| 316 |
+
# if verbose: print("[KNOWLEDGE] Strategic Knowledge Base updated!")
|
| 317 |
+
|
| 318 |
+
# self.last_learning_step = len(self.history)
|
| 319 |
+
|
| 320 |
+
# id_result = await client.call_tool("get_location_id", {})
|
| 321 |
+
# try:
|
| 322 |
+
# room_id = int(self._extract_result(id_result))
|
| 323 |
+
# except:
|
| 324 |
+
# room_id = -1 # Fallback
|
| 325 |
+
|
| 326 |
+
# room_name = structured_data.get("location_name", "Unknown Area")
|
| 327 |
+
|
| 328 |
+
# if step > 1:
|
| 329 |
+
# self.map.update(room_id, room_name, self.last_move_action)
|
| 330 |
+
|
| 331 |
+
# spatial_context = self.map.get_context_for_llm(room_id)
|
| 332 |
+
|
| 333 |
+
# Build context based on urgency
|
| 334 |
+
if is_urgent:
|
| 335 |
+
if verbose:
|
| 336 |
+
print("\n🔥 [MODE URGENCE ACTIVÉ] Le cochon court / Combat en cours !")
|
| 337 |
+
|
| 338 |
+
# STRATEGIC KNOWLEDGE: {strategic_knowledge if strategic_knowledge else "No knowledge yet."}
|
| 339 |
+
rich_context = f"""
|
| 340 |
+
!!! URGENT SITUATION - FOCUS ON IMMEDIATE ACTION !!!
|
| 341 |
+
SITUATION : {observation}
|
| 342 |
+
PRIORITY GUIDANCE : {priority_guidance if priority_guidance else "- [STATUS] Standard exploration."}
|
| 343 |
+
POSSIBLE ACTIONS: {enriched_actions}
|
| 344 |
+
"""
|
| 345 |
+
|
| 346 |
+
# rich_context = f"""
|
| 347 |
+
# !!! URGENT SITUATION - FOCUS ON IMMEDIATE ACTION !!!
|
| 348 |
+
# TARGETS/ENEMIES: {visible_entities}
|
| 349 |
+
# LAST EVENT: {summary}
|
| 350 |
+
# INVENTORY: {current_inv}
|
| 351 |
+
# KNOWLEDGE: {knowledge_content}
|
| 352 |
+
# SPATIAL : {spatial_context}
|
| 353 |
+
# RELEVANT MEMORIES: {memory_context}
|
| 354 |
+
# """
|
| 355 |
+
else:
|
| 356 |
+
# STRATEGIC KNOWLEDGE: {strategic_knowledge if strategic_knowledge else "No knowledge yet."}
|
| 357 |
+
# LOCAL MEMORY : {self.current_location} {memory_context}
|
| 358 |
+
rich_context = f"""
|
| 359 |
+
CURRENT SITUATION : {observation}
|
| 360 |
+
INVENTORY : {current_inv}
|
| 361 |
+
PRIORITY GUIDANCE : {priority_guidance if priority_guidance else "- [STATUS] Standard exploration."}
|
| 362 |
+
POSSIBLE ACTIONS: {enriched_actions}
|
| 363 |
+
"""
|
| 364 |
+
# rich_context = f"""
|
| 365 |
+
# LOCATION: {current_loc}
|
| 366 |
+
# ENTITIES: {visible_entities}
|
| 367 |
+
# OBJECTS: {visible_objs}
|
| 368 |
+
# EXITS: {visible_exits}
|
| 369 |
+
# INVENTORY: {current_inv}
|
| 370 |
+
# KNOWN MAP: {current_map}
|
| 371 |
+
# SUMMARY: {summary}
|
| 372 |
+
# KNOWLEDGE: {knowledge_content}
|
| 373 |
+
# SPATIAL : {spatial_context}
|
| 374 |
+
# RELEVANT MEMORIES : {memory_context}
|
| 375 |
+
# """
|
| 376 |
+
if verbose :
|
| 377 |
+
print(f"Context {rich_context}")
|
| 378 |
+
|
| 379 |
+
# # --- 2. STRATEGY (Planner) ---
|
| 380 |
+
# if step == 1 or step % 5 == 0:
|
| 381 |
+
# if verbose:
|
| 382 |
+
# print(f"\n[STRATEGY] Thinking about long-term plan...")
|
| 383 |
+
|
| 384 |
+
# plan = self.strategist.generate_plan(rich_context, self.history, step,knowledge_content)
|
| 385 |
+
|
| 386 |
+
# if verbose and plan:
|
| 387 |
+
# print(f"[STRATEGY] Objective: {plan.get('current_objective')}")
|
| 388 |
+
|
| 389 |
+
# # --- 3. GENERATION & CRITIC LOOP (Actor) ---
|
| 390 |
+
prompt = self._build_prompt(rich_context,knowledge="")
|
| 391 |
+
|
| 392 |
+
max_retries = 3
|
| 393 |
+
# Default values in case something goes wrong
|
| 394 |
+
final_tool_name = "play_action"
|
| 395 |
+
final_tool_args = {"action": "look"}
|
| 396 |
+
final_thought = "No thought generated"
|
| 397 |
+
|
| 398 |
+
for attempt in range(max_retries):
|
| 399 |
+
# Call LLM
|
| 400 |
+
response = call_llm(prompt, SYSTEM_PROMPT, seed + step + attempt)
|
| 401 |
+
|
| 402 |
+
# Parse response
|
| 403 |
+
thought, tool_name, tool_args = self._parse_response(response, tool_names)
|
| 404 |
+
final_thought = thought # Keep track of the thought
|
| 405 |
+
|
| 406 |
+
final_tool_name = tool_name
|
| 407 |
+
final_tool_args = tool_args
|
| 408 |
+
|
| 409 |
+
# Validate basics
|
| 410 |
+
tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
|
| 411 |
+
|
| 412 |
+
# Logic Validation (Critic)
|
| 413 |
+
# if tool_name == "play_action":
|
| 414 |
+
# proposed_action = tool_args.get("action", "look")
|
| 415 |
+
|
| 416 |
+
# is_allowed = self.critic.critique_action(
|
| 417 |
+
# proposed_action,
|
| 418 |
+
# rich_context,
|
| 419 |
+
# current_inv,
|
| 420 |
+
# current_loc,
|
| 421 |
+
# seed + step,
|
| 422 |
+
# valid_exits=visible_exits
|
| 423 |
+
# )
|
| 424 |
+
|
| 425 |
+
# if is_allowed:
|
| 426 |
+
# final_tool_name = tool_name
|
| 427 |
+
# final_tool_args = tool_args
|
| 428 |
+
# break # Success!
|
| 429 |
+
# else:
|
| 430 |
+
# if verbose:
|
| 431 |
+
# print(f"⚠️ Action '{proposed_action}' blocked by Critic. Retrying ({attempt+1}/{max_retries})...")
|
| 432 |
+
|
| 433 |
+
# # Add feedback to prompt for next attempt
|
| 434 |
+
# prompt += f"\n\nUSER: The action '{proposed_action}' is invalid, impossible (check exits), or repetitive. Please propose a DIFFERENT action."
|
| 435 |
+
|
| 436 |
+
# # If this was the last attempt, force fallback
|
| 437 |
+
# if attempt == max_retries - 1:
|
| 438 |
+
# if verbose:
|
| 439 |
+
# print("❌ Too many rejections. Forcing 'look'.")
|
| 440 |
+
# final_tool_name = "play_action"
|
| 441 |
+
# final_tool_args = {"action": "look"}
|
| 442 |
+
# else:
|
| 443 |
+
# # Non-game actions (map, memory) are always allowed
|
| 444 |
+
# final_tool_name = tool_name
|
| 445 |
+
# final_tool_args = tool_args
|
| 446 |
+
# break
|
| 447 |
+
|
| 448 |
+
# --- 4. EXECUTION PREPARATION ---
|
| 449 |
+
tool_name = final_tool_name
|
| 450 |
+
tool_args = final_tool_args
|
| 451 |
+
thought = final_thought # Ensure we have the thought for history
|
| 452 |
+
|
| 453 |
+
if verbose:
|
| 454 |
+
print(f"\n--- Step {step} ---")
|
| 455 |
+
print(f"[THOUGHT] {thought}")
|
| 456 |
+
print(f"[TOOL] {tool_name}({tool_args})")
|
| 457 |
+
|
| 458 |
+
# Handle history and loop detection for ACTIONS only
|
| 459 |
+
proposed_action = "tool_use"
|
| 460 |
+
if tool_name == "play_action":
|
| 461 |
+
proposed_action = tool_args.get("action", "look")
|
| 462 |
+
|
| 463 |
+
self.recent_actions.append(proposed_action)
|
| 464 |
+
if len(self.recent_actions) > 5:
|
| 465 |
+
self.recent_actions = self.recent_actions[-5:]
|
| 466 |
+
|
| 467 |
+
if len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
|
| 468 |
+
# On cherche les directions dans la liste possible_actions
|
| 469 |
+
import random
|
| 470 |
+
|
| 471 |
+
all_raw_actions = []
|
| 472 |
+
if isinstance(enriched_actions, str):
|
| 473 |
+
all_raw_actions = [line.strip("- ").strip() for line in enriched_actions.split('\n') if "-" in line]
|
| 474 |
+
|
| 475 |
+
move_keywords = ["north", "south", "east", "west", "ne", "nw", "se", "sw", "up", "down", "in", "out", "go "]
|
| 476 |
+
valid_moves = [
|
| 477 |
+
a for a in all_raw_actions
|
| 478 |
+
if any(k == a or a.startswith("go ") for k in move_keywords) or a == "wait"
|
| 479 |
+
]
|
| 480 |
+
|
| 481 |
+
if valid_moves:
|
| 482 |
+
# On choisit un mouvement au hasard PARMI ceux qui ne sont pas l'action répétée
|
| 483 |
+
last_action = self.recent_actions[-1]
|
| 484 |
+
choices = [m for m in valid_moves if m != last_action]
|
| 485 |
+
forced_move = random.choice(choices if choices else valid_moves)
|
| 486 |
+
|
| 487 |
+
tool_args = {"action": forced_move}
|
| 488 |
+
proposed_action = forced_move
|
| 489 |
+
|
| 490 |
+
if verbose:
|
| 491 |
+
print(f"🔄 [LOOP BREAK] Agent stuck on '{last_action}'. Forcing move to: {forced_move}")
|
| 492 |
+
else:
|
| 493 |
+
# Fallback ultime si aucun mouvement n'est détecté dans les actions possibles
|
| 494 |
+
tool_args = {"action": "wait"}
|
| 495 |
+
proposed_action = "wait"
|
| 496 |
+
|
| 497 |
+
moves += 1
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
# --- 5. EXECUTION ---
|
| 501 |
+
try:
|
| 502 |
+
result = await client.call_tool(tool_name, tool_args)
|
| 503 |
+
new_observation = self._extract_result(result)
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
full_action_key = tool_args.get("action", tool_name)
|
| 507 |
+
|
| 508 |
+
if self.current_location not in self.location_action_memory:
|
| 509 |
+
self.location_action_memory[self.current_location] = []
|
| 510 |
+
|
| 511 |
+
if tool_name :
|
| 512 |
+
summary = self._clean_memory_result(new_observation)
|
| 513 |
+
# On stocke un petit dictionnaire par action pour garder le contexte
|
| 514 |
+
action_entry = {
|
| 515 |
+
"action": full_action_key,
|
| 516 |
+
"result": summary,
|
| 517 |
+
"step": getattr(self, 'step_count', 0) # Optionnel: pour savoir quand c'est arrivé
|
| 518 |
+
}
|
| 519 |
+
self.location_action_memory[self.current_location].append(action_entry)
|
| 520 |
+
|
| 521 |
+
if tool_name == "play_action":
|
| 522 |
+
loc_res = await client.call_tool("get_location_info", {})
|
| 523 |
+
new_loc_dict = self._extract_result(loc_res)
|
| 524 |
+
|
| 525 |
+
print("new loc dict",new_loc_dict)
|
| 526 |
+
|
| 527 |
+
if isinstance(new_loc_dict, str):
|
| 528 |
+
import json
|
| 529 |
+
try:
|
| 530 |
+
new_loc_dict = json.loads(new_loc_dict)
|
| 531 |
+
except:
|
| 532 |
+
new_loc_dict = {}
|
| 533 |
+
else:
|
| 534 |
+
new_loc_dict = new_loc_dict
|
| 535 |
+
|
| 536 |
+
print("new loc dict",new_loc_dict)
|
| 537 |
+
new_id = str(new_loc_dict["location"].get("id"))
|
| 538 |
+
|
| 539 |
+
action_text = tool_args.get("action", "")
|
| 540 |
+
direction_attempted = self.world_mapper._extract_direction(action_text)
|
| 541 |
+
|
| 542 |
+
if direction_attempted and new_id == str(self.last_room_id):
|
| 543 |
+
reason = self._clean_memory_result(new_observation)
|
| 544 |
+
self.world_mapper.mark_blocked_exit(new_id, direction_attempted, reason)
|
| 545 |
+
print(f"🚫 BLOCAGE : {direction_attempted} n'a pas fonctionné.")
|
| 546 |
+
else:
|
| 547 |
+
print("direction attempted",direction_attempted,action_text)
|
| 548 |
+
new_structured = self.extractor.extract(
|
| 549 |
+
raw_text=new_observation,
|
| 550 |
+
seed=seed,
|
| 551 |
+
ram_data=new_loc_dict,
|
| 552 |
+
last_location=self.current_location
|
| 553 |
+
)
|
| 554 |
+
self.world_mapper.update_map(new_structured, action_text, new_observation)
|
| 555 |
+
|
| 556 |
+
|
| 557 |
+
# Update critic memory
|
| 558 |
+
# if tool_name == "play_action":
|
| 559 |
+
# self.critic.record_result(proposed_action, current_loc, new_observation)
|
| 560 |
+
|
| 561 |
+
# if hasattr(self, 'memory_manager'):
|
| 562 |
+
# try:
|
| 563 |
+
# self.memory_manager.synthesize(current_loc, proposed_action, new_observation, step)
|
| 564 |
+
# except Exception as mem_e:
|
| 565 |
+
# print(f"⚠️ Memory Synthesis Failed: {mem_e}") # On log mais on continue
|
| 566 |
+
|
| 567 |
+
observation = new_observation
|
| 568 |
+
|
| 569 |
+
if verbose:
|
| 570 |
+
print(f"[RESULT] {observation}...")
|
| 571 |
+
|
| 572 |
+
except Exception as e:
|
| 573 |
+
observation = f"Error: {e}"
|
| 574 |
+
if verbose:
|
| 575 |
+
print(f"[ERROR] {e}")
|
| 576 |
+
|
| 577 |
+
# --- 6. TRACKING ---
|
| 578 |
+
# location = observation.split("\n")[0] if observation else "Unknown"
|
| 579 |
+
# locations_visited.add(location)
|
| 580 |
+
|
| 581 |
+
self.history.append({
|
| 582 |
+
"step": step,
|
| 583 |
+
"thought": thought,
|
| 584 |
+
"tool": tool_name,
|
| 585 |
+
"args": tool_args,
|
| 586 |
+
"result": observation[:200]
|
| 587 |
+
})
|
| 588 |
+
if len(self.history) > 10:
|
| 589 |
+
self.history = self.history[-10:]
|
| 590 |
+
|
| 591 |
+
self._update_score(observation)
|
| 592 |
+
|
| 593 |
+
# Add to full run history
|
| 594 |
+
history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
|
| 595 |
+
|
| 596 |
+
if self._is_game_over(observation):
|
| 597 |
+
if verbose:
|
| 598 |
+
print("\n*** GAME OVER ***")
|
| 599 |
+
|
| 600 |
+
# self.knowledge_manager.update_knowledge(
|
| 601 |
+
# self.history, self.last_learning_step, len(self.history),
|
| 602 |
+
# lambda p, s, seed=seed: call_llm(p, s, seed, max_tokens=2000)
|
| 603 |
+
# )
|
| 604 |
+
break
|
| 605 |
|
| 606 |
+
return RunResult(
|
| 607 |
+
final_score=self.score,
|
| 608 |
+
max_score=350,
|
| 609 |
+
moves=moves,
|
| 610 |
+
locations_visited=locations_visited,
|
| 611 |
+
game_completed=self._is_game_over(observation),
|
| 612 |
+
history=history,
|
| 613 |
+
)
|
| 614 |
+
|
| 615 |
+
|
| 616 |
+
def _clean_memory_result(self, text: str) -> str:
|
| 617 |
+
"""Nettoie le résultat pour la mémoire : une seule ligne, max 80 chars."""
|
| 618 |
+
if not text: return ""
|
| 619 |
+
# 1. Remplace les sauts de ligne par des espaces
|
| 620 |
+
clean = text.replace('\n', ' ').strip()
|
| 621 |
+
# 2. Supprime les doubles espaces
|
| 622 |
+
clean = " ".join(clean.split())
|
| 623 |
+
return clean
|
| 624 |
+
|
| 625 |
+
# def _generate_enriched_actions(self, raw_actions: str, structured_data: dict) -> str:
|
| 626 |
+
# enriched = [raw_actions]
|
| 627 |
+
|
| 628 |
+
# # 1. Objets à prendre
|
| 629 |
+
# objs = structured_data.get("takeable_objects", [])
|
| 630 |
+
# if objs:
|
| 631 |
+
# enriched.append("\nOBJECTS TO TAKE:")
|
| 632 |
+
# for obj in objs:
|
| 633 |
+
# enriched.append(f" - take {obj}")
|
| 634 |
+
# enriched.append(f" - examine {obj}")
|
| 635 |
+
|
| 636 |
+
# # 2. Éléments du décor (Vital pour éviter les "examine" dans le vide)
|
| 637 |
+
# features = structured_data.get("interactable_features", [])
|
| 638 |
+
# if features:
|
| 639 |
+
# enriched.append("\nENVIRONMENTAL FEATURES (MUST EXAMINE):")
|
| 640 |
+
# for feat in features:
|
| 641 |
+
# enriched.append(f" - examine {feat}")
|
| 642 |
+
|
| 643 |
+
# # 3. Entités (Sans Grunk)
|
| 644 |
+
# ents = [e for e in structured_data.get("entities", []) if e.lower() != "grunk"]
|
| 645 |
+
# if ents:
|
| 646 |
+
# enriched.append("\nENTITIES (INTERACT):")
|
| 647 |
+
# for ent in ents:
|
| 648 |
+
# enriched.append(f" - talk to {ent}")
|
| 649 |
+
|
| 650 |
+
# return "\n".join(enriched)
|
| 651 |
+
|
| 652 |
+
def _generate_enriched_actions(self, structured_data: dict) -> str:
|
| 653 |
"""
|
| 654 |
+
Génère un kit d'action utilisant les noms traduits (friendly)
|
| 655 |
+
ou signalant les noms techniques si nécessaire.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 656 |
"""
|
| 657 |
+
ram_objects = structured_data.get("raw_ram_objects", [])
|
| 658 |
+
mapping = structured_data.get("name_translation", {})
|
| 659 |
+
inventory_raw = structured_data.get("inventory", [])
|
| 660 |
+
inventory = [obj["name"] for obj in inventory_raw]
|
| 661 |
+
|
| 662 |
+
kit = ["### 🛠️ ACTION CONSTRUCTION KIT"]
|
| 663 |
+
|
| 664 |
+
cheat_actions = structured_data.get("cheat_actions", [])
|
| 665 |
+
if cheat_actions:
|
| 666 |
+
# On met ces actions en avant car elles sont GARANTIES valides par le moteur
|
| 667 |
+
kit.append(f"**DIRECT ACTIONS (Proven Valid)**: {', '.join(cheat_actions)}")
|
| 668 |
+
kit.append(f"ABOVE ACTIONS ARE VALID BUT CAN BE DUMB")
|
| 669 |
+
|
| 670 |
+
# 1. VERBES PERMANENTS
|
| 671 |
+
verbs = ["examine", "take", "drop", "look", "inventory", "wait", "listen", "search"]
|
| 672 |
+
|
| 673 |
+
# 2. VERBES CONTEXTUELS
|
| 674 |
+
# On scanne le texte et la RAM pour adapter les verbes
|
| 675 |
+
all_context = (structured_data.get("description_summary", "") + " ".join(ram_objects)).lower()
|
| 676 |
|
| 677 |
+
if any(x in all_context for x in ["door", "gate", "mailbox", "chest", "box", "case", "window"]):
|
| 678 |
+
verbs += ["open", "close", "unlock", "lock"]
|
| 679 |
+
|
| 680 |
+
if any(x in all_context or "torch" in str(inventory).lower() for x in ["lamp", "torch", "switch", "device"]):
|
| 681 |
+
verbs += ["turn on", "turn off", "light", "extinguish"]
|
| 682 |
+
|
| 683 |
+
kit.append(f"**CORE VERBS**: {', '.join(sorted(set(verbs)))}")
|
| 684 |
+
|
| 685 |
+
# 3. OBJETS (Traitement Friendly vs Technical)
|
| 686 |
+
visible_ram = [o for o in ram_objects if "(missing" not in o.lower()]
|
| 687 |
+
hidden_ram = [o for o in ram_objects if "(missing" in o.lower()]
|
| 688 |
+
|
| 689 |
+
if visible_ram:
|
| 690 |
+
formatted_objects = []
|
| 691 |
+
for tech_name in visible_ram:
|
| 692 |
+
# Si on a une traduction dans le mapping, on l'utilise
|
| 693 |
+
if tech_name in mapping:
|
| 694 |
+
friendly_name = mapping[tech_name]
|
| 695 |
+
formatted_objects.append(f"{friendly_name.upper()}")
|
| 696 |
+
else:
|
| 697 |
+
# Sinon, on affiche le nom technique avec un avertissement
|
| 698 |
+
formatted_objects.append(f"{tech_name} [! technical name] map to possible real and simple words")
|
| 699 |
+
|
| 700 |
+
kit.append(f"**OBJECTS AROUND YOU**: {', '.join(formatted_objects)}")
|
| 701 |
+
kit.append("_Note: Use names exactly as shown above._")
|
| 702 |
+
|
| 703 |
+
|
| 704 |
+
|
| 705 |
+
# 4. ALERTE SECRET
|
| 706 |
+
# if hidden_ram:
|
| 707 |
+
# kit.append(f"**⚠️ SENSORY ALERT**: Something is hidden. Use 'listen', 'search' or 'examine' on specific features.")
|
| 708 |
+
|
| 709 |
+
# 5. NAVIGATION
|
| 710 |
+
# exits = structured_data.get("visible_exits", [])
|
| 711 |
+
# kit.append(f"**NAVIGATION**: {', '.join([e.upper() for e in exits]) if exits else 'N, S, E, W, U, D, IN, OUT'}")
|
| 712 |
+
|
| 713 |
+
return "\n".join(kit)
|
| 714 |
+
|
| 715 |
+
def classify_interaction(self, action: str, result: str,observation: str,inventory : str, name_mapping : dict, possible_actions : str) -> dict:
|
| 716 |
"""
|
| 717 |
+
Demande au LLM si l'action a échoué et pourquoi.
|
| 718 |
+
"""
|
| 719 |
+
valid_names = ", ".join([f"'{friendly}' (ID: {tech})" for tech, friendly in name_mapping.items()])
|
| 720 |
+
|
| 721 |
+
prompt = f"""
|
| 722 |
+
Analyze this text adventure interaction:
|
| 723 |
+
Current Observation : {observation}
|
| 724 |
+
Inventory : {inventory}
|
| 725 |
+
valid objects to use: {valid_names}
|
| 726 |
+
ACTION: "{action}"
|
| 727 |
+
RESULT: "{result}"
|
| 728 |
+
|
| 729 |
+
Here is possible actions to take {possible_actions}
|
| 730 |
+
|
| 731 |
+
Is this interaction a FAILURE (e.g., object not found, too dark, locked, invalid name, generic response)?
|
| 732 |
+
If it's a failure, provide a short reason and a suggestion.
|
| 733 |
+
|
| 734 |
+
Return ONLY JSON:
|
| 735 |
+
{{
|
| 736 |
+
"is_failure": true/false,
|
| 737 |
+
"reason": "short explanation",
|
| 738 |
+
"suggestion": "what to try instead"
|
| 739 |
+
}}
|
| 740 |
"""
|
| 741 |
+
# On utilise un seed fixe pour la consistance
|
| 742 |
+
response = call_llm(prompt, "You are a Game Logic Analyzer.", seed=42)
|
| 743 |
+
try:
|
| 744 |
+
import json
|
| 745 |
+
return json.loads(response)
|
| 746 |
+
except:
|
| 747 |
+
return {"is_failure": False, "reason": "", "suggestion": ""}
|
| 748 |
+
|
| 749 |
+
# def _build_priority_guidance(self, structured_data: dict) -> str:
|
| 750 |
+
# guidance = []
|
| 751 |
+
# loc_name = self.current_location
|
| 752 |
+
# loc_id = structured_data.get("location_id", self.last_room_id or -1)
|
| 753 |
+
# tree_display = structured_data.get("tree", [])
|
| 754 |
+
|
| 755 |
|
| 756 |
+
# # On utilise objects_in_room pour avoir la hiérarchie (parents/enfants)
|
| 757 |
+
# # Si absent, on replie sur raw_ram_objects pour la compatibilité
|
| 758 |
+
# # objects_tree = structured_data.get("objects_in_room", [])
|
| 759 |
+
# objects_tree = structured_data.get("objects_in_room", [])
|
| 760 |
+
# raw_ram_objects = structured_data.get("raw_ram_objects",[])
|
| 761 |
+
# mapping = structured_data.get("name_translation", {})
|
| 762 |
|
| 763 |
+
# past_actions = self.location_action_memory.get(loc_name, [])
|
| 764 |
+
# interacted_res = None
|
| 765 |
+
# world_changed = structured_data.get("world_changed", False)
|
| 766 |
+
# is_new_loc = structured_data.get("is_new_location", False)
|
| 767 |
+
|
| 768 |
+
|
| 769 |
+
# if self.recent_actions:
|
| 770 |
+
# last_action_taken = self.recent_actions[-1]
|
| 771 |
+
# # On récupère le dernier élément de notre liste de mémoire
|
| 772 |
+
# last_entry = past_actions[-1] if past_actions else {}
|
| 773 |
+
# last_res = last_entry.get("result", "No feedback available.")
|
| 774 |
+
|
| 775 |
+
# guidance.append("### ⚡ LAST ACTION FEEDBACK:")
|
| 776 |
+
# guidance.append(f"- Command: '{last_action_taken}'")
|
| 777 |
+
# guidance.append(f"- Full Result: '{last_res}'") # Ici on met tout le texte
|
| 778 |
+
# guidance.append(f"USE ALL THE HINT FROM THIS ACTIONS. YOU CAN EXAMINE OBJECTS IN THE RESULT. EX : Examine bowl")
|
| 779 |
+
# guidance.append("⚠️ TRUST THE CURRENT OBSERVATION TEXT OVER MEMORY FOR OBJECT NAMES. USE IT.")
|
| 780 |
+
# guidance.append("")
|
| 781 |
+
|
| 782 |
+
# if world_changed:
|
| 783 |
+
# guidance.append("- [!] SUCCESS: The world state changed. Analyze the new situation.")
|
| 784 |
+
# elif not is_new_loc:
|
| 785 |
+
# guidance.append("- [!] STASIS: No change detected. Do not repeat this exact command.")
|
| 786 |
+
# guidance.append("")
|
| 787 |
+
|
| 788 |
+
# visible_exits = structured_data.get("visible_exits", [])
|
| 789 |
+
|
| 790 |
+
# guidance.append("### 🗺️ STRATEGIC NAVIGATION:")
|
| 791 |
+
# visible_exits = structured_data.get("visible_exits", [])
|
| 792 |
+
# room_data = self.world_mapper.graph.get(loc_id, {})
|
| 793 |
+
# known_exits = room_data.get("exits", {})
|
| 794 |
+
# blocked_exits = room_data.get("blocked_exits", {})
|
| 795 |
+
|
| 796 |
+
# for direction in visible_exits:
|
| 797 |
+
# d_l = direction.lower()
|
| 798 |
+
# if d_l in blocked_exits:
|
| 799 |
+
# guidance.append(f"- {direction.upper()} : 🚫 BLOCKED ({blocked_exits[d_l]})")
|
| 800 |
+
# elif d_l in known_exits:
|
| 801 |
+
# target_id = known_exits[d_l]
|
| 802 |
+
# target_room = self.world_mapper.graph.get(str(target_id), {})
|
| 803 |
+
# t_name = target_room.get("name", "Unknown Area")
|
| 804 |
+
# status = "✅ Fully Visited" if target_room.get("items_scanned") else "🔎 Unexplored Items"
|
| 805 |
+
# guidance.append(f"- {direction.upper()} : Leads to **{t_name}** ({status})")
|
| 806 |
+
# else:
|
| 807 |
+
# guidance.append(f"- {direction.upper()} : 🌟 [NEW PATHWAY - EXPLORE THIS]")
|
| 808 |
+
# guidance.append("")
|
| 809 |
+
|
| 810 |
+
# guidance.append("\n### 🧠 GLOBAL STRATEGY & PUZZLE RADAR:")
|
| 811 |
|
| 812 |
+
# world_knowledge = []
|
| 813 |
+
# active_puzzles = []
|
|
|
|
| 814 |
|
| 815 |
+
# for r_id, r_data in self.world_mapper.graph.items():
|
| 816 |
+
# # On ne liste pas la pièce actuelle pour éviter le bruit
|
| 817 |
+
# if r_id != loc_id:
|
| 818 |
+
# # Objets laissés ailleurs
|
| 819 |
+
# if r_data.get("items"):
|
| 820 |
+
# world_knowledge.append(f"- In {r_data['name']}: {', '.join(r_data['items'])}")
|
| 821 |
+
|
| 822 |
+
# # Puzzles détectés ailleurs
|
| 823 |
+
# for p in r_data.get("puzzles", []):
|
| 824 |
+
# active_puzzles.append(f"- [{r_data['name']}] Obstacle: {p}")
|
| 825 |
+
|
| 826 |
+
# if world_knowledge:
|
| 827 |
+
# guidance.append("**Dropped/Left Items:**")
|
| 828 |
+
# guidance.extend(world_knowledge)
|
| 829 |
+
|
| 830 |
+
# print("VISIBLE EXISTS",visible_exits)
|
| 831 |
+
# if visible_exits:
|
| 832 |
+
# guidance.append("### 🚪 AVAILABLE EXITS:")
|
| 833 |
+
|
| 834 |
+
# # On extrait les directions déjà tentées dans cette pièce
|
| 835 |
+
# # On cherche des actions comme "go east", "east", "move north", etc.
|
| 836 |
+
# attempted_exits = []
|
| 837 |
+
# for entry in past_actions:
|
| 838 |
+
# action_str = entry.get("action", "").lower()
|
| 839 |
+
# for direction in visible_exits:
|
| 840 |
+
# if direction.lower() in action_str:
|
| 841 |
+
# attempted_exits.append(direction.lower())
|
| 842 |
+
|
| 843 |
+
# exit_guidance = []
|
| 844 |
+
# for direction in visible_exits:
|
| 845 |
+
# if direction.lower() in attempted_exits:
|
| 846 |
+
# exit_guidance.append(f"- {direction.upper()} (Already explored/tried)")
|
| 847 |
+
# else:
|
| 848 |
+
# exit_guidance.append(f"- {direction.upper()} 🌟 [NEW PATHWAY]")
|
| 849 |
+
|
| 850 |
+
# guidance.extend(exit_guidance)
|
| 851 |
+
|
| 852 |
+
# # Petit conseil stratégique si tout est exploré
|
| 853 |
+
# if all(d.lower() in attempted_exits for d in visible_exits) and visible_exits:
|
| 854 |
+
# guidance.append("- [TIP] All exits tried. If stuck, look for hidden objects or interact with items.")
|
| 855 |
+
# guidance.append("")
|
| 856 |
+
|
| 857 |
+
# # --- 2. HISTORIQUE CHRONOLOGIQUE (LES 10 DERNIÈRES) ---
|
| 858 |
+
# if past_actions:
|
| 859 |
+
# guidance.append(f"### 🛑 CHRONOLOGICAL HISTORY (Last 10 steps in this room):")
|
| 860 |
+
# # On prend les 10 dernières entrées de la liste
|
| 861 |
+
# for entry in past_actions[-10:-1]:
|
| 862 |
+
# act = entry.get("action")
|
| 863 |
+
# res = entry.get("result")
|
| 864 |
+
# # On affiche tout le résultat, mais ligne par ligne pour la clarté
|
| 865 |
+
# guidance.append(f"STEP {entry.get('step', '?')}: '{act}'")
|
| 866 |
+
# guidance.append(f" Result: {res}")
|
| 867 |
+
# guidance.append("")
|
| 868 |
+
|
| 869 |
+
|
| 870 |
+
# if world_changed or is_new_loc:
|
| 871 |
+
# guidance.append("### 🔥 HOT FOCUS: NEW STATE DETECTED")
|
| 872 |
+
# if is_new_loc:
|
| 873 |
+
# guidance.append(f"- [LOCATION] You just entered '{loc_name}'. All previous room logic is VOID.")
|
| 874 |
+
# if world_changed:
|
| 875 |
+
# guidance.append("- [WORLD CHANGE] The game state has mutated! Something opened, moved, or appeared.")
|
| 876 |
+
# guidance.append("- [URGENT] Acts based on the change (e.g., if something opened, look inside).")
|
| 877 |
+
|
| 878 |
+
# # --- SCAN HIERARCHIQUE (OBJETS PARENTS) ---
|
| 879 |
+
# # print("TREE DISPLAY",tree_display)
|
| 880 |
+
# # if tree_display:
|
| 881 |
+
# # guidance.append("### 🌳 ENVIRONMENT HIERARCHY (SENSORS):")
|
| 882 |
+
# # guidance.append("This is the exact structure of the room:")
|
| 883 |
+
# # for line in tree_display:
|
| 884 |
+
# # guidance.append(line)
|
| 885 |
+
# # guidance.append("")
|
| 886 |
+
|
| 887 |
+
# guidance.append(f"### 🔍 RAM SENSORS (INTELLIGENT SCAN):")
|
| 888 |
+
|
| 889 |
+
# objects_in_room = structured_data.get("objects_in_room", [])
|
| 890 |
+
# unexplored = [] # Jamais touchés
|
| 891 |
+
# failed_attempts = [] # Tentés mais avec erreur
|
| 892 |
+
# known_success = [] # Déjà examinés avec succès
|
| 893 |
+
|
| 894 |
+
# for obj_data in objects_in_room:
|
| 895 |
+
# if not isinstance(obj_data, dict):
|
| 896 |
+
# continue
|
| 897 |
+
|
| 898 |
+
# raw_name = obj_data.get("name", "Unknown")
|
| 899 |
+
# friendly_name = mapping.get(raw_name, raw_name).upper()
|
| 900 |
+
|
| 901 |
+
# # 1. Recherche de la dernière interaction
|
| 902 |
+
# last_entry = None
|
| 903 |
+
# for entry in reversed(past_actions):
|
| 904 |
+
# act = entry.get("action", "").lower()
|
| 905 |
+
# if raw_name.lower() in act or friendly_name.lower() in act:
|
| 906 |
+
# last_entry = entry
|
| 907 |
+
# break
|
| 908 |
+
|
| 909 |
+
# # 2. Construction de l'info de contenu (Hiérarchie)
|
| 910 |
+
# content_list = obj_data.get("contents", [])
|
| 911 |
+
# content_str = f" [Contains: {', '.join([mapping.get(c['name'], c['name']).upper() for c in content_list])}]" if content_list else ""
|
| 912 |
+
# display_line = f"- {friendly_name}{content_str}"
|
| 913 |
+
|
| 914 |
+
# # 3. Classification par LLM si une interaction existe
|
| 915 |
+
# if last_entry:
|
| 916 |
+
# action_tried = last_entry.get("action")
|
| 917 |
+
# result_received = last_entry.get("result")
|
| 918 |
+
|
| 919 |
+
# # On demande au LLM si c'est un échec
|
| 920 |
+
# analysis = self.classify_interaction(action_tried, result_received)
|
| 921 |
+
|
| 922 |
+
# if analysis.get("is_failure"):
|
| 923 |
+
# # C'est un échec : on le met en priorité pour correction
|
| 924 |
+
# fail_msg = f"{display_line}\n ⚠️ FAILED: '{action_tried}' -> {analysis['reason']}\n 💡 SUGGESTION: {analysis['suggestion']}"
|
| 925 |
+
# failed_attempts.append(fail_msg)
|
| 926 |
+
# else:
|
| 927 |
+
# # C'est un succès : on le met dans les acquis
|
| 928 |
+
# known_success.append(f"{display_line} | ✅ Known: {result_received[:60]}...")
|
| 929 |
+
# else:
|
| 930 |
+
# # Jamais tenté
|
| 931 |
+
# unexplored.append(display_line)
|
| 932 |
+
|
| 933 |
+
# # --- AFFICHAGE HIÉRARCHISÉ ---
|
| 934 |
+
|
| 935 |
+
# if failed_attempts:
|
| 936 |
+
# guidance.append("### ⚠️ ACTIONS TO CORRECT (FAILED ATTEMPTS):")
|
| 937 |
+
# guidance.extend(failed_attempts)
|
| 938 |
+
# guidance.append("")
|
| 939 |
+
|
| 940 |
+
# if unexplored:
|
| 941 |
+
# guidance.append("### ✨ NEW / UNEXPLORED IN ROOM:")
|
| 942 |
+
# guidance.extend(unexplored)
|
| 943 |
+
# guidance.append("")
|
| 944 |
+
|
| 945 |
+
# if known_success:
|
| 946 |
+
# guidance.append("### ✅ ALREADY INTERACTED (SUCCESS):")
|
| 947 |
+
# guidance.extend(known_success)
|
| 948 |
+
# guidance.append("")
|
| 949 |
+
|
| 950 |
+
# # --- GUIDANCE STRATÉGIQUE MISE À JOUR ---
|
| 951 |
+
# guidance.append("\n**STRATEGY GUIDANCE**:")
|
| 952 |
+
# if failed_attempts:
|
| 953 |
+
# guidance.append("- [RECOVERY] Prioritize correcting FAILED actions. Do not repeat the same command; follow the suggestion.")
|
| 954 |
+
# guidance.append("- [PHYSICS] If an item is inside a container, you MUST 'OPEN' or 'EXAMINE' the parent first.")
|
| 955 |
+
# guidance.append("- [EXPLORATION] Do not leave this room until all 'NEW' and 'FAILED' items are resolved.")
|
| 956 |
+
|
| 957 |
+
# # --- DÉTECTION DES SECRETS ---
|
| 958 |
+
# # On utilise raw_ram_objects ici car c'est une liste de strings simple
|
| 959 |
+
# raw_names_list = structured_data.get("raw_ram_objects", [])
|
| 960 |
+
# has_secrets = any("(missing" in o.lower() for o in raw_names_list)
|
| 961 |
+
# if has_secrets:
|
| 962 |
+
# guidance.append("### ⚠️ SENSORY ANOMALY:")
|
| 963 |
+
# guidance.append("- [GOD-MODE] Hidden structures detected. Use 'SEARCH' or 'EXAMINE' on the scenery.")
|
| 964 |
+
|
| 965 |
+
|
| 966 |
+
# clues = " ".join(structured_data.get("puzzle_clues", [])).lower()
|
| 967 |
+
# if any(k in clues for k in ["noise", "hear", "sound"]):
|
| 968 |
+
# guidance.append("- [AUDIO] Noise detected! Use 'listen'.")
|
| 969 |
+
|
| 970 |
+
# if is_new_loc:
|
| 971 |
+
# guidance.append("### 🚀 NEW AREA PROTOCOL: 1. 'look', 2. 'examine' new objects, 3. 'listen', 4. explore exits.")
|
| 972 |
+
|
| 973 |
+
# return "\n".join(guidance)
|
| 974 |
+
|
| 975 |
+
def _build_priority_guidance(self, structured_data: dict) -> str:
|
| 976 |
+
guidance = []
|
| 977 |
+
loc_name = self.current_location
|
| 978 |
+
loc_id = str(structured_data.get("location_id", self.last_room_id or -1))
|
| 979 |
+
mapping = structured_data.get("name_translation", {})
|
| 980 |
+
past_actions = self.location_action_memory.get(loc_name, [])
|
| 981 |
+
|
| 982 |
+
|
| 983 |
+
current_inventory = str(structured_data.get("inventory", "Unknown"))
|
| 984 |
+
current_obs = structured_data.get("description_summary", "No description")
|
| 985 |
+
enriched_actions = self._generate_enriched_actions(structured_data)
|
| 986 |
+
|
| 987 |
+
|
| 988 |
+
if self.steps_in_current_room >= MAX_STEPS_ALLOWED:
|
| 989 |
+
guidance.append("\n### 🚀 URGENT STRATEGIC DIRECTIVE:")
|
| 990 |
+
guidance.append(f"- [STAGNATION ALERT] {self.steps_in_current_room} turns in this room.")
|
| 991 |
+
|
| 992 |
+
known_room = self.world_mapper.graph.get(loc_id, {})
|
| 993 |
+
potential = known_room.get("potential_exits", [])
|
| 994 |
+
already_linked = known_room.get("exits", {}) # Dict: {direction: target_id}
|
| 995 |
+
|
| 996 |
+
new_paths = [p for p in potential if p not in already_linked]
|
| 997 |
+
|
| 998 |
+
# --- CAS 1 : Il y a des nouvelles sorties à tester ---
|
| 999 |
+
if new_paths:
|
| 1000 |
+
guidance.append(f"- [ACTION] Move to a NEW area. Priority: {', '.join(new_paths).upper()}")
|
| 1001 |
+
|
| 1002 |
+
# --- CAS 2 : On connaît déjà des sorties ---
|
| 1003 |
+
elif already_linked:
|
| 1004 |
+
# On suggère de faire marche arrière ou de changer de zone
|
| 1005 |
+
guidance.append("- [ACTION] Room exhausted. Backtrack or move to a known adjacent room.")
|
| 1006 |
+
directions_list = [d.upper() for d in already_linked.keys()]
|
| 1007 |
+
guidance.append(f"- [HINT] Known exits: {', '.join(directions_list)}")
|
| 1008 |
+
|
| 1009 |
+
# --- CAS 3 : AUCUNE SORTIE TROUVÉE (Le vrai problème) ---
|
| 1010 |
+
else:
|
| 1011 |
+
guidance.append("- [CRITICAL] No exits found in memory or observation.")
|
| 1012 |
+
guidance.append("- [ACTION] Use 'SEARCH', 'LISTEN', or 'EXAMINE' on the scenery to find hidden passages.")
|
| 1013 |
+
guidance.append("- [HINT] Try common directions anyway: NORTH, SOUTH, EAST, WEST,NORTHEAST,NORTHWEST,SOUTHEAST,SOUTHWEST, UP, DOWN.")
|
| 1014 |
+
|
| 1015 |
+
|
| 1016 |
+
|
| 1017 |
+
# ============================================================
|
| 1018 |
+
# 1. FEEDBACK IMMÉDIAT (Ce qui vient de se passer)
|
| 1019 |
+
# ============================================================
|
| 1020 |
+
if self.recent_actions and past_actions:
|
| 1021 |
+
last_action_taken = self.recent_actions[-1]
|
| 1022 |
+
last_entry = past_actions[-1]
|
| 1023 |
+
|
| 1024 |
+
guidance.append("### ⚡ LAST ACTION FEEDBACK:")
|
| 1025 |
+
guidance.append(f"- Command: '{last_action_taken}'")
|
| 1026 |
+
|
| 1027 |
+
# On analyse l'action qui vient d'être faite
|
| 1028 |
+
immediate_analysis = self.classify_interaction(
|
| 1029 |
+
action=last_entry['action'],
|
| 1030 |
+
result=last_entry['result'],
|
| 1031 |
+
observation=current_obs,
|
| 1032 |
+
inventory=current_inventory,
|
| 1033 |
+
name_mapping=mapping,
|
| 1034 |
+
possible_actions=enriched_actions
|
| 1035 |
+
)
|
| 1036 |
+
|
| 1037 |
+
if immediate_analysis.get("is_failure"):
|
| 1038 |
+
guidance.append(f"- [!] STATUS: FAILURE")
|
| 1039 |
+
guidance.append(f"- [!] REASON: {immediate_analysis['reason']}")
|
| 1040 |
+
guidance.append(f"- [!] SUGGESTION: {immediate_analysis['suggestion']}")
|
| 1041 |
+
elif structured_data.get("world_changed"):
|
| 1042 |
+
guidance.append("- [!] STATUS: SUCCESS (World state updated)")
|
| 1043 |
+
else:
|
| 1044 |
+
guidance.append(f"- [!] STATUS: NEUTRAL / INFO: {last_entry.get('result', '')}")
|
| 1045 |
+
|
| 1046 |
+
guidance.append("IF ANOTHER ENTITIES IS INTERESTED BY AN ITEM IT'S A HINT. EX : pig climb fountain -> fountain must be an important object to examine or search")
|
| 1047 |
+
|
| 1048 |
+
# ============================================================
|
| 1049 |
+
# 2. VISION STRATÉGIQUE (Utilisation de TA fonction !)
|
| 1050 |
+
# ============================================================
|
| 1051 |
+
# On appelle la fonction de ton WorldMapper
|
| 1052 |
+
strategic_summary = self.world_mapper.generate_summary(loc_id)
|
| 1053 |
+
guidance.append(strategic_summary)
|
| 1054 |
+
guidance.append("")
|
| 1055 |
+
|
| 1056 |
+
# ============================================================
|
| 1057 |
+
# 3. SCAN RAM LOCAL (Détails des objets de la pièce)
|
| 1058 |
+
# ============================================================
|
| 1059 |
+
guidance.append("### 🔍 LOCAL OBJECT SCAN (RAM):")
|
| 1060 |
+
objects_in_room = structured_data.get("objects_in_room", [])
|
| 1061 |
+
unexplored = []
|
| 1062 |
+
failed_attempts = []
|
| 1063 |
+
known_success = []
|
| 1064 |
+
|
| 1065 |
+
for obj_data in objects_in_room:
|
| 1066 |
+
if not isinstance(obj_data, dict): continue
|
| 1067 |
+
raw_name = obj_data.get("name", "Unknown")
|
| 1068 |
+
has_hidden = obj_data.get("contains_count", 0) > 0
|
| 1069 |
+
|
| 1070 |
+
friendly_name = mapping.get(raw_name, "").upper()
|
| 1071 |
+
present = True
|
| 1072 |
+
if friendly_name == "":
|
| 1073 |
+
present = False
|
| 1074 |
+
|
| 1075 |
+
# Trouver la dernière interaction spécifique à cet objet
|
| 1076 |
+
last_obj_entry = None
|
| 1077 |
+
for entry in reversed(past_actions):
|
| 1078 |
+
act = entry.get("action", "").lower()
|
| 1079 |
+
found_raw = raw_name and raw_name.lower() in act
|
| 1080 |
+
found_friendly = friendly_name and friendly_name.lower() in act
|
| 1081 |
+
|
| 1082 |
+
if found_raw or found_friendly:
|
| 1083 |
+
last_obj_entry = entry
|
| 1084 |
+
break
|
| 1085 |
+
|
| 1086 |
+
# Texte de hiérarchie (Contenu)
|
| 1087 |
+
contents = obj_data.get("contents", [])
|
| 1088 |
+
content_str = f" [Contains: {', '.join([mapping.get(c['name'], c['name']).upper() for c in contents])}]" if contents else ""
|
| 1089 |
+
if contents:
|
| 1090 |
+
child_names = [c.get("name", "Unknown").upper() for c in contents]
|
| 1091 |
+
content_str = f" [Contains: {', '.join(child_names)}]"
|
| 1092 |
+
|
| 1093 |
+
if present:
|
| 1094 |
+
display_line = f"- {friendly_name}{content_str}"
|
| 1095 |
+
|
| 1096 |
+
if not present :
|
| 1097 |
+
display_line = f"- {raw_name}{content_str} [!] Technical Name use the real name or a full word. Ex : fountabowl -> bowl, brokstair -> stairs ... CHECK YOUR HISTORY AND OBSERVATION"
|
| 1098 |
+
|
| 1099 |
+
if last_obj_entry:
|
| 1100 |
+
last_act_text = last_obj_entry['action']
|
| 1101 |
+
last_res_text = last_obj_entry['result']
|
| 1102 |
+
|
| 1103 |
+
# Si cette action était un échec (on réutilise ta fonction de classification)
|
| 1104 |
+
analysis = self.classify_interaction(
|
| 1105 |
+
action=last_obj_entry['action'],
|
| 1106 |
+
result=last_obj_entry['result'],
|
| 1107 |
+
observation=current_obs,
|
| 1108 |
+
inventory=current_inventory,
|
| 1109 |
+
name_mapping=mapping,
|
| 1110 |
+
possible_actions=self._generate_enriched_actions(structured_data)
|
| 1111 |
+
)
|
| 1112 |
+
|
| 1113 |
+
if analysis.get("is_failure"):
|
| 1114 |
+
guidance.append(f"{display_line}\n ❌ Last tried: '{last_act_text}' -> {analysis['reason']}")
|
| 1115 |
+
guidance.append(f" 💡 Suggestion: {analysis['suggestion']}")
|
| 1116 |
+
else:
|
| 1117 |
+
guidance.append(f"{display_line} | ✅ Last: '{last_act_text}' (Success) {last_act_text}")
|
| 1118 |
+
else:
|
| 1119 |
+
# Jamais touché
|
| 1120 |
+
if present or contents :
|
| 1121 |
+
guidance.append(f"{display_line} | ✨ UNEXPLORED")
|
| 1122 |
+
|
| 1123 |
+
# # Affichage des listes
|
| 1124 |
+
# if failed_attempts:
|
| 1125 |
+
# guidance.append("\n**⚠️ TO CORRECT:**")
|
| 1126 |
+
# guidance.extend(failed_attempts)
|
| 1127 |
+
|
| 1128 |
+
# if unexplored:
|
| 1129 |
+
# guidance.append("\n**✨ UNEXPLORED:**")
|
| 1130 |
+
# guidance.extend(unexplored)
|
| 1131 |
+
|
| 1132 |
+
# ============================================================
|
| 1133 |
+
# 4. CHRONOLOGIE (Historique des commandes)
|
| 1134 |
+
# ============================================================
|
| 1135 |
+
if past_actions:
|
| 1136 |
+
guidance.append(f"\n### 🛑 ROOM HISTORY (Last steps):")
|
| 1137 |
+
for entry in past_actions[-10:]:
|
| 1138 |
+
guidance.append(f"- '{entry.get('action')}' -> {entry.get('result')}...")
|
| 1139 |
+
|
| 1140 |
+
MAX_STEPS_ALLOWED = 8
|
| 1141 |
+
|
| 1142 |
+
|
| 1143 |
+
return "\n".join(guidance)
|
| 1144 |
+
|
| 1145 |
+
def _build_prompt(self, observation: str,knowledge:str) -> str:
|
| 1146 |
+
"""Build the prompt for the LLM with context."""
|
| 1147 |
+
parts = []
|
| 1148 |
+
|
| 1149 |
+
# strategy_text = self.strategist.get_strategy_context()
|
| 1150 |
+
# parts.append(strategy_text)
|
| 1151 |
+
# parts.append("-" * 40)
|
| 1152 |
+
|
| 1153 |
+
parts.append(f"Current Score: {self.score}")
|
| 1154 |
+
|
| 1155 |
+
# Recent history
|
| 1156 |
+
if self.history:
|
| 1157 |
+
parts.append("\nRecent actions:")
|
| 1158 |
+
for entry in self.history[-3:]:
|
| 1159 |
+
action = entry.get("args", {}).get("action", entry["tool"])
|
| 1160 |
+
result_short = entry["result"][:80] + "..." if len(entry["result"]) > 80 else entry["result"]
|
| 1161 |
+
parts.append(f" > {action} -> {result_short}")
|
| 1162 |
+
|
| 1163 |
+
# Warn about repeated actions
|
| 1164 |
+
if self.recent_actions and len(set(self.recent_actions[-3:])) == 1:
|
| 1165 |
+
parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]")
|
| 1166 |
+
|
| 1167 |
+
# if knowledge:
|
| 1168 |
+
# parts.append("\n=== ACQUIRED STRATEGIC KNOWLEDGE ===")
|
| 1169 |
+
# parts.append(knowledge)
|
| 1170 |
+
# parts.append("====================================\n")
|
| 1171 |
+
|
| 1172 |
+
parts.append(f"\nCURRENT SITUATION:\n{observation}")
|
| 1173 |
+
parts.append("\nWhat is your next specific command?")
|
| 1174 |
+
|
| 1175 |
+
return "\n".join(parts)
|
| 1176 |
+
|
| 1177 |
+
def _parse_response(self, response: str, valid_tools: list[str]) -> tuple[str, str, dict]:
|
| 1178 |
+
"""Parse the LLM response to extract thought, tool, and arguments."""
|
| 1179 |
+
thought = "No reasoning provided"
|
| 1180 |
+
tool_name = "play_action"
|
| 1181 |
+
tool_args = {"action": "look"}
|
| 1182 |
+
|
| 1183 |
+
lines = response.strip().split("\n")
|
| 1184 |
+
|
| 1185 |
+
for line in lines:
|
| 1186 |
+
line_clean = line.strip()
|
| 1187 |
+
line_upper = line_clean.upper()
|
| 1188 |
+
|
| 1189 |
+
if line_upper.startswith("THOUGHT:"):
|
| 1190 |
+
thought = line_clean.split(":", 1)[1].strip()
|
| 1191 |
+
|
| 1192 |
+
elif line_upper.startswith("TOOL:"):
|
| 1193 |
+
raw_tool = line_clean.split(":", 1)[1].strip().lower()
|
| 1194 |
+
raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "")
|
| 1195 |
+
raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
|
| 1196 |
+
tool_name = raw_tool
|
| 1197 |
+
|
| 1198 |
+
elif line_upper.startswith("ARGS:"):
|
| 1199 |
+
args_part = line_clean.split(":", 1)[1].strip()
|
| 1200 |
+
try:
|
| 1201 |
+
args_part = args_part.replace("'", '"')
|
| 1202 |
+
tool_args = json.loads(args_part)
|
| 1203 |
+
except json.JSONDecodeError:
|
| 1204 |
+
match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part)
|
| 1205 |
+
if match:
|
| 1206 |
+
tool_args = {"action": match.group(1)}
|
| 1207 |
+
else:
|
| 1208 |
+
tool_args = {"action": "look"}
|
| 1209 |
+
|
| 1210 |
+
return thought, tool_name, tool_args
|
| 1211 |
+
|
| 1212 |
+
def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
|
| 1213 |
+
"""Validate and fix common tool call issues."""
|
| 1214 |
+
# Fix tool name
|
| 1215 |
+
if tool_name not in valid_tools:
|
| 1216 |
+
if tool_name in ["action", "do", "command"]:
|
| 1217 |
+
tool_name = "play_action"
|
| 1218 |
+
elif tool_name in ["map", "location"]:
|
| 1219 |
+
tool_name = "get_map"
|
| 1220 |
+
elif tool_name in ["mem", "state", "status"]:
|
| 1221 |
+
tool_name = "memory"
|
| 1222 |
+
elif tool_name in ["inv", "items"]:
|
| 1223 |
+
tool_name = "inventory"
|
| 1224 |
+
else:
|
| 1225 |
+
tool_name = "play_action"
|
| 1226 |
+
|
| 1227 |
+
# Fix action verbs
|
| 1228 |
+
if tool_name == "play_action":
|
| 1229 |
+
action = str(tool_args.get("action", "look")).lower().strip()
|
| 1230 |
+
direction = tool_args.get("direction")
|
| 1231 |
+
|
| 1232 |
+
# Fusion de la direction si le LLM l'a mise à part
|
| 1233 |
+
if direction and str(direction).lower() not in action:
|
| 1234 |
+
action = f"{action} {direction}"
|
| 1235 |
+
|
| 1236 |
+
nav_map = {
|
| 1237 |
+
"north": "n", "south": "s", "east": "e", "west": "w",
|
| 1238 |
+
"northeast": "ne", "northwest": "nw",
|
| 1239 |
+
"southeast": "se", "southwest": "sw",
|
| 1240 |
+
"up": "u", "down": "d"
|
| 1241 |
+
}
|
| 1242 |
+
|
| 1243 |
+
invalid_verb_map = {
|
| 1244 |
+
"check": "examine",
|
| 1245 |
+
"inspect": "examine",
|
| 1246 |
+
"search": "look",
|
| 1247 |
+
"grab": "take",
|
| 1248 |
+
"pick": "take",
|
| 1249 |
+
"use": "examine",
|
| 1250 |
+
"investigate": "examine",
|
| 1251 |
+
}
|
| 1252 |
+
|
| 1253 |
+
if action.startswith("go "):
|
| 1254 |
+
action = action.replace("go ", "").strip()
|
| 1255 |
+
|
| 1256 |
+
words = action.split()
|
| 1257 |
+
if words:
|
| 1258 |
+
if words[0] in invalid_verb_map:
|
| 1259 |
+
words[0] = invalid_verb_map[words[0]]
|
| 1260 |
+
action = " ".join(words)
|
| 1261 |
+
|
| 1262 |
+
if words[0] == "examine":
|
| 1263 |
+
words = words[:2]
|
| 1264 |
+
action = " ".join(words)
|
| 1265 |
+
else:
|
| 1266 |
+
action = " ".join(words)
|
| 1267 |
+
|
| 1268 |
+
if action in nav_map:
|
| 1269 |
+
action = nav_map[action]
|
| 1270 |
+
|
| 1271 |
+
action = action.replace("**", "").replace("*", "").replace("`", "")
|
| 1272 |
+
action = " ".join(action.split())
|
| 1273 |
+
|
| 1274 |
+
return tool_name, {"action": action}
|
| 1275 |
+
|
| 1276 |
+
return tool_name, tool_args
|
| 1277 |
+
|
| 1278 |
+
def _extract_result(self, result) -> str:
|
| 1279 |
+
"""Extract text from MCP tool result."""
|
| 1280 |
+
if hasattr(result, 'content') and result.content:
|
| 1281 |
+
return result.content[0].text
|
| 1282 |
+
if isinstance(result, list) and result:
|
| 1283 |
+
return result[0].text if hasattr(result[0], 'text') else str(result[0])
|
| 1284 |
+
return str(result)
|
| 1285 |
+
|
| 1286 |
+
def _update_score(self, text: str) -> None:
|
| 1287 |
+
"""Update score from game text."""
|
| 1288 |
+
patterns = [
|
| 1289 |
+
r'Score:\s*(\d+)',
|
| 1290 |
+
r'score[:\s]+(\d+)',
|
| 1291 |
+
r'\[Score:\s*(\d+)',
|
| 1292 |
+
]
|
| 1293 |
+
|
| 1294 |
+
for pattern in patterns:
|
| 1295 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 1296 |
+
if match:
|
| 1297 |
+
self.score = max(self.score, int(match.group(1)))
|
| 1298 |
+
|
| 1299 |
+
def _is_game_over(self, text: str) -> bool:
|
| 1300 |
+
"""Check if the game is over."""
|
| 1301 |
+
game_over_phrases = [
|
| 1302 |
+
"game over",
|
| 1303 |
+
"you have died",
|
| 1304 |
+
"you are dead",
|
| 1305 |
+
"*** you have died ***",
|
| 1306 |
+
]
|
| 1307 |
+
text_lower = text.lower()
|
| 1308 |
+
return any(phrase in text_lower for phrase in game_over_phrases)
|
| 1309 |
|
| 1310 |
|
| 1311 |
# =============================================================================
|
| 1312 |
+
# Local Testing
|
| 1313 |
# =============================================================================
|
| 1314 |
|
| 1315 |
async def test_agent():
|
| 1316 |
"""Test the agent locally."""
|
| 1317 |
from fastmcp import Client
|
| 1318 |
|
|
|
|
|
|
|
|
|
|
| 1319 |
agent = StudentAgent()
|
| 1320 |
|
| 1321 |
+
async with Client("mcp_server.py") as client:
|
| 1322 |
result = await agent.run(
|
| 1323 |
client=client,
|
| 1324 |
game="zork1",
|
| 1325 |
+
max_steps=20,
|
| 1326 |
seed=42,
|
| 1327 |
verbose=True,
|
| 1328 |
)
|
| 1329 |
|
| 1330 |
+
print(f"\n{'=' * 50}")
|
| 1331 |
+
print(f"Final Score: {result.final_score}")
|
| 1332 |
print(f"Moves: {result.moves}")
|
| 1333 |
+
print(f"Locations: {len(result.locations_visited)}")
|
| 1334 |
+
|
| 1335 |
+
|
| 1336 |
+
class StrategyModule:
|
| 1337 |
+
"""
|
| 1338 |
+
Gère la planification haut niveau (The 'Brain').
|
| 1339 |
+
"""
|
| 1340 |
+
def __init__(self):
|
| 1341 |
+
self.current_plan = None
|
| 1342 |
+
self.last_update_step = 0
|
| 1343 |
+
|
| 1344 |
+
def generate_plan(self, observation: str, history: list, step: int,knowledge:str) -> dict:
|
| 1345 |
+
"""Génère ou met à jour le plan stratégique."""
|
| 1346 |
+
|
| 1347 |
+
# Préparer le contexte pour le stratège
|
| 1348 |
+
history_summary = "\n".join(
|
| 1349 |
+
[f"- {h['thought']} -> {h['result'][:50]}..." for h in history[-5:]]
|
| 1350 |
+
)
|
| 1351 |
+
|
| 1352 |
+
prompt = f"""
|
| 1353 |
+
CURRENT SITUATION:
|
| 1354 |
+
{observation}
|
| 1355 |
+
|
| 1356 |
+
RECENT HISTORY:
|
| 1357 |
+
{history_summary}
|
| 1358 |
+
|
| 1359 |
+
ACQUIRED KNOWLEDGE (Tips & Rules from previous games):
|
| 1360 |
+
{knowledge if knowledge else "No prior knowledge available."}
|
| 1361 |
+
|
| 1362 |
+
TASK:
|
| 1363 |
+
Based on the Situation and Knowledge, create a strategic plan.
|
| 1364 |
+
If the Knowledge says "Trolls fear swords", and you see a Troll, your plan must be "Find sword".
|
| 1365 |
+
"""
|
| 1366 |
+
|
| 1367 |
+
# Appel LLM dédié à la stratégie (on peut utiliser un seed différent)
|
| 1368 |
+
response = call_llm(prompt, PLANNER_SYSTEM_PROMPT, seed=step, max_tokens=400)
|
| 1369 |
+
|
| 1370 |
+
try:
|
| 1371 |
+
# Nettoyage basique pour extraire le JSON si le modèle bavarde
|
| 1372 |
+
json_str = response.strip()
|
| 1373 |
+
if "```json" in json_str:
|
| 1374 |
+
json_str = json_str.split("```json")[1].split("```")[0]
|
| 1375 |
+
elif "```" in json_str:
|
| 1376 |
+
json_str = json_str.split("```")[1].split("```")[0]
|
| 1377 |
+
|
| 1378 |
+
self.current_plan = json.loads(json_str)
|
| 1379 |
+
self.last_update_step = step
|
| 1380 |
+
return self.current_plan
|
| 1381 |
+
except Exception as e:
|
| 1382 |
+
print(f"[Strategy Error] Failed to parse plan: {e}")
|
| 1383 |
+
return None
|
| 1384 |
+
|
| 1385 |
+
def get_strategy_context(self) -> str:
|
| 1386 |
+
"""Renvoie une chaîne de texte à injecter dans le prompt de l'Acteur."""
|
| 1387 |
+
if not self.current_plan:
|
| 1388 |
+
return "NO ACTIVE PLAN. Explore cautiously."
|
| 1389 |
+
|
| 1390 |
+
return f"""
|
| 1391 |
+
*** STRATEGIC GUIDANCE ***
|
| 1392 |
+
CURRENT OBJECTIVE: {self.current_plan.get('current_objective', 'Unknown')}
|
| 1393 |
+
STRATEGIC REASONING: {self.current_plan.get('reasoning', 'None')}
|
| 1394 |
+
STEPS TO TAKE:
|
| 1395 |
+
{chr(10).join(['- ' + s for s in self.current_plan.get('suggested_steps', [])])}
|
| 1396 |
+
"""
|
| 1397 |
+
|
| 1398 |
+
##### CRITIC Agent
|
| 1399 |
+
|
| 1400 |
+
@dataclass
|
| 1401 |
+
class CriticResponse:
|
| 1402 |
+
score: float
|
| 1403 |
+
justification: str
|
| 1404 |
+
is_fatal: bool = False
|
| 1405 |
+
|
| 1406 |
+
class ActionHistoryTracker:
|
| 1407 |
+
"""
|
| 1408 |
+
Detect loop and critics
|
| 1409 |
+
"""
|
| 1410 |
+
def __init__(self):
|
| 1411 |
+
self.recent_actions: List[str] = []
|
| 1412 |
+
self.location_history: List[str] = []
|
| 1413 |
+
self.failed_actions_per_location: Dict[str, Set[str]] = {}
|
| 1414 |
+
|
| 1415 |
+
def update(self, action: str, location: str, result: str):
|
| 1416 |
+
self.recent_actions.append(action)
|
| 1417 |
+
self.location_history.append(location)
|
| 1418 |
+
|
| 1419 |
+
# Détection basique d'échec basée sur le texte du jeu
|
| 1420 |
+
failure_keywords = ["can't", "don't", "nothing happens", "impossible", "failed", "no such"]
|
| 1421 |
+
if any(k in result.lower() for k in failure_keywords):
|
| 1422 |
+
if location not in self.failed_actions_per_location:
|
| 1423 |
+
self.failed_actions_per_location[location] = set()
|
| 1424 |
+
self.failed_actions_per_location[location].add(action)
|
| 1425 |
+
|
| 1426 |
+
#
|
| 1427 |
+
if len(self.recent_actions) > 20:
|
| 1428 |
+
self.recent_actions.pop(0)
|
| 1429 |
+
self.location_history.pop(0)
|
| 1430 |
+
|
| 1431 |
+
def is_looping(self, proposed_action: str) -> bool:
|
| 1432 |
+
"""Détecte les répétitions immédiates."""
|
| 1433 |
+
if len(self.recent_actions) >= 3:
|
| 1434 |
+
# Si les 3 dernières actions sont identiques à la proposée
|
| 1435 |
+
if all(a == proposed_action for a in self.recent_actions[-3:]):
|
| 1436 |
+
return True
|
| 1437 |
+
return False
|
| 1438 |
+
|
| 1439 |
+
def is_known_failure(self, proposed_action: str, current_location: str) -> bool:
|
| 1440 |
+
"""Vérifie si cette action a déjà échoué ici."""
|
| 1441 |
+
if current_location in self.failed_actions_per_location:
|
| 1442 |
+
if proposed_action in self.failed_actions_per_location[current_location]:
|
| 1443 |
+
return True
|
| 1444 |
+
return False
|
| 1445 |
+
|
| 1446 |
+
class CriticAgent:
|
| 1447 |
+
"""
|
| 1448 |
+
Le module critique qui valide les actions avant exécution.
|
| 1449 |
+
"""
|
| 1450 |
+
def __init__(self,call_llm_func, verbose: bool = False):
|
| 1451 |
+
self.history_tracker = ActionHistoryTracker()
|
| 1452 |
+
self.verbose = verbose
|
| 1453 |
+
self.call_llm = call_llm_func
|
| 1454 |
+
|
| 1455 |
+
def check_heuristics(self, action: str, current_location: str,valid_exits:list[str]) -> tuple[bool, str]:
|
| 1456 |
+
"""
|
| 1457 |
+
Vérification rapide basée sur des règles (Pas de LLM).
|
| 1458 |
+
Retourne (Est_Valid, Raison).
|
| 1459 |
+
"""
|
| 1460 |
+
# 1. Vérifier si c'est une action vide
|
| 1461 |
+
if not action or len(action.strip()) < 2:
|
| 1462 |
+
return False, "Action too short or empty"
|
| 1463 |
+
|
| 1464 |
+
# 2. Vérifier les boucles immédiates
|
| 1465 |
+
if self.history_tracker.is_looping(action):
|
| 1466 |
+
return False, "Detected infinite loop (action repeated too many times)"
|
| 1467 |
+
|
| 1468 |
+
# 3. Vérifier les échecs connus (Memory-based rejection)
|
| 1469 |
+
if self.history_tracker.is_known_failure(action, current_location):
|
| 1470 |
+
return False, f"Action '{action}' previously failed in this location"
|
| 1471 |
+
|
| 1472 |
+
directions = ["north", "south", "east", "west", "up", "down",
|
| 1473 |
+
"n", "s", "e", "w", "u", "d", "ne", "nw", "se", "sw"]
|
| 1474 |
+
|
| 1475 |
+
action_word = action.lower().strip()
|
| 1476 |
+
|
| 1477 |
+
if action_word in directions and valid_exits:
|
| 1478 |
+
|
| 1479 |
+
is_possible = False
|
| 1480 |
+
for exit_name in valid_exits:
|
| 1481 |
+
if action_word in exit_name.lower() or exit_name.lower().startswith(action_word):
|
| 1482 |
+
is_possible = True
|
| 1483 |
+
break
|
| 1484 |
+
|
| 1485 |
+
if not is_possible:
|
| 1486 |
+
return False, f"You can't go '{action_word}'. Visible exits are: {valid_exits}"
|
| 1487 |
+
|
| 1488 |
+
return True, "Heuristics passed"
|
| 1489 |
+
|
| 1490 |
+
return True, "Heuristics passed"
|
| 1491 |
+
|
| 1492 |
+
def evaluate_with_llm(self, action: str, observation: str, inventory: str, seed: int) -> CriticResponse:
|
| 1493 |
+
"""
|
| 1494 |
+
Évaluation sémantique lente via LLM.
|
| 1495 |
+
"""
|
| 1496 |
+
# Construction du prompt
|
| 1497 |
+
prompt = f"""
|
| 1498 |
+
OBSERVATION:
|
| 1499 |
+
{observation[:1000]}...
|
| 1500 |
+
|
| 1501 |
+
INVENTORY:
|
| 1502 |
+
{inventory}
|
| 1503 |
+
|
| 1504 |
+
PROPOSED ACTION:
|
| 1505 |
+
{action}
|
| 1506 |
+
|
| 1507 |
+
Evaluate this action.
|
| 1508 |
+
"""
|
| 1509 |
+
|
| 1510 |
+
try:
|
| 1511 |
+
response_text = self.call_llm(prompt, CRITIC_SYSTEM_PROMPT, seed=seed, max_tokens=150)
|
| 1512 |
+
|
| 1513 |
+
# Parsing JSON résilient
|
| 1514 |
+
json_str = response_text.strip()
|
| 1515 |
+
if "```json" in json_str:
|
| 1516 |
+
json_str = json_str.split("```json")[1].split("```")[0]
|
| 1517 |
+
elif "```" in json_str:
|
| 1518 |
+
json_str = json_str.split("```")[1].split("```")[0]
|
| 1519 |
+
|
| 1520 |
+
data = json.loads(json_str)
|
| 1521 |
+
return CriticResponse(
|
| 1522 |
+
score=float(data.get("score", 0.5)),
|
| 1523 |
+
justification=data.get("justification", "No reason provided"),
|
| 1524 |
+
is_fatal=data.get("is_fatal", False)
|
| 1525 |
+
)
|
| 1526 |
+
|
| 1527 |
+
except Exception as e:
|
| 1528 |
+
if self.verbose:
|
| 1529 |
+
print(f"[Critic Error] LLM validation failed: {e}")
|
| 1530 |
+
# En cas d'erreur, on laisse passer (fail open)
|
| 1531 |
+
return CriticResponse(score=1.0, justification="Validation failed, allowing action")
|
| 1532 |
+
|
| 1533 |
+
def critique_action(self, action: str, observation: str, inventory: str, current_location: str, seed: int,valid_exits: list[str] = None) -> bool:
|
| 1534 |
+
"""
|
| 1535 |
+
Méthode principale à appeler depuis l'agent.
|
| 1536 |
+
Retourne True si l'action est acceptée, False sinon.
|
| 1537 |
+
"""
|
| 1538 |
+
# 1. Filtre Heuristique (Rapide & Gratuit)
|
| 1539 |
+
is_valid, reason = self.check_heuristics(action, current_location,valid_exits)
|
| 1540 |
+
if not is_valid:
|
| 1541 |
+
if self.verbose:
|
| 1542 |
+
print(f"🛑 [CRITIC REJECT - RULE] {reason}")
|
| 1543 |
+
return False
|
| 1544 |
+
|
| 1545 |
+
# 2. Filtre LLM (Lent & Coûteux - on peut l'activer seulement pour les actions complexes)
|
| 1546 |
+
# Pour optimiser, on ne vérifie pas les mouvements simples (north, south, etc.)
|
| 1547 |
+
simple_moves = ["north", "south", "east", "west", "up", "down", "look", "inventory"]
|
| 1548 |
+
if action.lower() in simple_moves:
|
| 1549 |
+
return True
|
| 1550 |
+
|
| 1551 |
+
# Appel LLM pour les actions complexes (take, attack, open...)
|
| 1552 |
+
evaluation = self.evaluate_with_llm(action, observation, inventory, seed)
|
| 1553 |
+
|
| 1554 |
+
if evaluation.score < 0.4 or evaluation.is_fatal:
|
| 1555 |
+
if self.verbose:
|
| 1556 |
+
print(f"🛑 [CRITIC REJECT - LLM] Score: {evaluation.score} | Reason: {evaluation.justification}")
|
| 1557 |
+
return False
|
| 1558 |
+
|
| 1559 |
+
return True
|
| 1560 |
+
|
| 1561 |
+
def record_result(self, action: str, current_location: str, result_text: str):
|
| 1562 |
+
"""Met à jour la mémoire du critique après l'exécution."""
|
| 1563 |
+
self.history_tracker.update(action, current_location, result_text)
|
| 1564 |
+
|
| 1565 |
+
|
| 1566 |
+
#### Extractor of data
|
| 1567 |
+
|
| 1568 |
+
class StructuredObservation(BaseModel):
|
| 1569 |
+
location_id: int # L'ID RAM de la pièce
|
| 1570 |
+
location_name: str
|
| 1571 |
+
is_new_location: bool
|
| 1572 |
+
world_changed: bool # Basé sur le Hash
|
| 1573 |
+
description_summary: str
|
| 1574 |
+
takeable_objects: List[str]
|
| 1575 |
+
visible_exits: List[str]
|
| 1576 |
+
interactable_features: List[str]
|
| 1577 |
+
puzzle_clues: List[str]
|
| 1578 |
+
entities: List[str]
|
| 1579 |
+
in_combat: bool
|
| 1580 |
+
raw_ram_objects : List[str]
|
| 1581 |
+
name_translation: Dict[str, str]
|
| 1582 |
+
|
| 1583 |
+
|
| 1584 |
+
class ObservationExtractor:
|
| 1585 |
+
def __init__(self, call_llm_func):
|
| 1586 |
+
self.call_llm = call_llm_func
|
| 1587 |
+
|
| 1588 |
+
def extract(self, raw_text: str, seed: int, ram_data: dict, last_location: str = "Unknown") -> dict:
|
| 1589 |
+
"""
|
| 1590 |
+
Convertit le texte brut et les données RAM structurées en dictionnaire.
|
| 1591 |
+
"""
|
| 1592 |
+
inventory_raw = ram_data.get("inventory", [])
|
| 1593 |
+
inventory_names = [item["name"] for item in inventory_raw]
|
| 1594 |
+
|
| 1595 |
+
# 1. Extraction sécurisée des données imbriquées de ram_data
|
| 1596 |
+
location_info = ram_data.get("location", {})
|
| 1597 |
+
curr_id = location_info.get("id", -1)
|
| 1598 |
+
curr_name = location_info.get("name", last_location)
|
| 1599 |
+
|
| 1600 |
+
|
| 1601 |
+
# Extraction des noms d'objets depuis la liste de dictionnaires [{'name':...}, ...]
|
| 1602 |
+
detected_objs_raw_all = ram_data.get("detected_objects", [])
|
| 1603 |
+
inventory_names_l = [item["name"].lower() for item in ram_data.get("inventory", [])]
|
| 1604 |
+
|
| 1605 |
+
detected_objs_raw = []
|
| 1606 |
+
for obj in detected_objs_raw_all:
|
| 1607 |
+
name_l = obj["name"].lower()
|
| 1608 |
+
content_names_l = [c["name"].lower() for c in obj.get("contents", [])]
|
| 1609 |
+
|
| 1610 |
+
# Si c'est le joueur (par son nom ou parce qu'il contient nos items d'inventaire), on l'ignore
|
| 1611 |
+
is_player = (
|
| 1612 |
+
name_l in ["inconnu", "self", "player", "me", "grunk"] or
|
| 1613 |
+
(content_names_l and any(inv_item in content_names_l for inv_item in inventory_names_l))
|
| 1614 |
+
)
|
| 1615 |
+
|
| 1616 |
+
if not is_player:
|
| 1617 |
+
detected_objs_raw.append(obj)
|
| 1618 |
+
|
| 1619 |
+
|
| 1620 |
+
inventory_data = ram_data.get("inventory", [])
|
| 1621 |
+
inv_names_lower = [str(item.get("name", "")).lower() for item in inventory_data]
|
| 1622 |
+
|
| 1623 |
+
objects_in_ram = []
|
| 1624 |
+
tree_view = []
|
| 1625 |
+
|
| 1626 |
+
for parent in detected_objs_raw:
|
| 1627 |
+
p_name = parent["name"]
|
| 1628 |
+
p_contents = parent.get("contents", [])
|
| 1629 |
+
p_content_names = [c["name"] for c in p_contents]
|
| 1630 |
+
|
| 1631 |
+
# On normalise tout en minuscule pour la comparaison
|
| 1632 |
+
p_name_l = p_name.lower()
|
| 1633 |
+
p_content_names_l = [n.lower() for n in p_content_names]
|
| 1634 |
+
|
| 1635 |
+
# Logique de détection du joueur améliorée
|
| 1636 |
+
is_player_container = (
|
| 1637 |
+
p_name_l in ["inconnu", "self", "player", "me", "inventory", "grunk"] or
|
| 1638 |
+
# On vérifie si UN des objets de l'inventaire est présent dans ce container
|
| 1639 |
+
any(inv_item in p_content_names_l for inv_item in inv_names_lower) or
|
| 1640 |
+
# Ou si le nom du parent est lui même dans l'inventaire
|
| 1641 |
+
p_name_l in inv_names_lower
|
| 1642 |
+
)
|
| 1643 |
+
|
| 1644 |
+
if is_player_container:
|
| 1645 |
+
# Optionnel: print(f"DEBUG: On ignore l'objet joueur : {p_name}")
|
| 1646 |
+
continue
|
| 1647 |
+
|
| 1648 |
+
# --- Si c'est un vrai objet du décor ---
|
| 1649 |
+
objects_in_ram.append(p_name)
|
| 1650 |
+
|
| 1651 |
+
if p_contents:
|
| 1652 |
+
c_names = [c["name"] for c in p_contents]
|
| 1653 |
+
objects_in_ram.extend(c_names)
|
| 1654 |
+
tree_view.append(f"- {p_name} (contains: {', '.join(c_names)})")
|
| 1655 |
+
else:
|
| 1656 |
+
tree_view.append(f"- {p_name}")
|
| 1657 |
+
|
| 1658 |
+
|
| 1659 |
+
# 3. Construction du prompt
|
| 1660 |
+
# Note : on signale au LLM que les objets (missing...) sont des indices de puzzles
|
| 1661 |
+
prompt = f"""
|
| 1662 |
+
RAW GAME TEXT:
|
| 1663 |
+
{raw_text}
|
| 1664 |
+
|
| 1665 |
+
--- RAM DATA (TECHNICAL TRUTH) ---
|
| 1666 |
+
CURRENT LOCATION ID: {curr_id}
|
| 1667 |
+
OBJECTS DETECTED IN RAM: {", ".join(objects_in_ram)}
|
| 1668 |
+
OBJECTS DETECTED (HIERARCHY):
|
| 1669 |
+
{chr(10).join(tree_view)}
|
| 1670 |
+
|
| 1671 |
+
|
| 1672 |
+
JSON SCHEMA:
|
| 1673 |
+
Follow the StructuredObservation model.
|
| 1674 |
+
"""
|
| 1675 |
+
|
| 1676 |
+
try:
|
| 1677 |
+
# Appel LLM (Assure-toi que call_llm supporte ton nouveau token/modèle)
|
| 1678 |
+
response = self.call_llm(
|
| 1679 |
+
prompt,
|
| 1680 |
+
EXTRACTOR_SYSTEM_PROMPT,
|
| 1681 |
+
seed=seed
|
| 1682 |
+
)
|
| 1683 |
+
|
| 1684 |
+
json_match = re.search(r'\{.*\}', response, re.DOTALL)
|
| 1685 |
+
data = json.loads(json_match.group(0)) if json_match else json.loads(response)
|
| 1686 |
+
|
| 1687 |
+
data["location_id"] = curr_id
|
| 1688 |
+
data["location_name"] = curr_name
|
| 1689 |
+
data["raw_ram_objects"] = objects_in_ram
|
| 1690 |
+
|
| 1691 |
+
if not data.get("location_name") or data.get("location_name") == "Unknown":
|
| 1692 |
+
data["location_name"] = curr_name
|
| 1693 |
+
|
| 1694 |
+
raw_data_mapping = data.get("name_translation", {})
|
| 1695 |
+
valid_mapping = {}
|
| 1696 |
+
lower_text = raw_text.lower()
|
| 1697 |
+
for tech_name, friendly_name in raw_data_mapping.items():
|
| 1698 |
+
if tech_name in objects_in_ram:
|
| 1699 |
+
if friendly_name.lower() in lower_text:
|
| 1700 |
+
valid_mapping[tech_name] = friendly_name
|
| 1701 |
+
else:
|
| 1702 |
+
pass
|
| 1703 |
+
|
| 1704 |
+
parent_names = [item["name"] for item in detected_objs_raw]
|
| 1705 |
+
# Filtrage pour éviter les hallucinations
|
| 1706 |
+
data["takeable_objects"] = [o for o in data.get("takeable_objects", []) if o in parent_names]
|
| 1707 |
+
data["interactable_features"] = [o for o in data.get("interactable_features", []) if o in parent_names]
|
| 1708 |
+
data["objects_in_room"] = detected_objs_raw
|
| 1709 |
+
data["name_translation"] = valid_mapping
|
| 1710 |
+
|
| 1711 |
+
# Ajout automatique des secrets détectés dans la RAM (comme ton ID 101)
|
| 1712 |
+
secrets = [o for o in objects_in_ram if "(missing" in o.lower()]
|
| 1713 |
+
if secrets:
|
| 1714 |
+
if "puzzle_clues" not in data: data["puzzle_clues"] = []
|
| 1715 |
+
data["puzzle_clues"].append(f"RAM Alert: {len(secrets)} hidden object(s) detected. Search the area.")
|
| 1716 |
+
|
| 1717 |
+
return data
|
| 1718 |
+
|
| 1719 |
+
except Exception as e:
|
| 1720 |
+
print(f"[Extractor Error] {e}")
|
| 1721 |
+
|
| 1722 |
+
manual_exits = []
|
| 1723 |
+
lower_text = raw_text.lower()
|
| 1724 |
+
for d in VALID_DIRECTIONS:
|
| 1725 |
+
# On cherche la direction entourée d'espaces ou de ponctuation
|
| 1726 |
+
if re.search(rf"\b{d}\b", lower_text):
|
| 1727 |
+
manual_exits.append(d)
|
| 1728 |
+
|
| 1729 |
+
return {
|
| 1730 |
+
"location_id": curr_id,
|
| 1731 |
+
"location_name": curr_name,
|
| 1732 |
+
"description_summary": raw_text,
|
| 1733 |
+
"raw_ram_objects": objects_in_ram,
|
| 1734 |
+
"objects_in_room" : detected_objs_raw,
|
| 1735 |
+
"visible_exits": manual_exits, # On injecte le scan manuel
|
| 1736 |
+
"name_translation": {},
|
| 1737 |
+
"takeable_objects": [],
|
| 1738 |
+
"puzzle_clues": ["Erreur d'extraction LLM."]
|
| 1739 |
+
}
|
| 1740 |
+
|
| 1741 |
+
#### Utilitaires de sections
|
| 1742 |
+
|
| 1743 |
+
class SectionUtils:
|
| 1744 |
+
"""
|
| 1745 |
+
Utilitaires de nos fonctions
|
| 1746 |
+
"""
|
| 1747 |
+
@staticmethod
|
| 1748 |
+
def extract_section_content(content: str, section_name: str) -> str:
|
| 1749 |
+
if not content: return ""
|
| 1750 |
+
pattern = rf"## {re.escape(section_name)}(.*?)(?=\n## |$)"
|
| 1751 |
+
match = re.search(pattern, content, re.DOTALL)
|
| 1752 |
+
return match.group(1).strip() if match else ""
|
| 1753 |
+
|
| 1754 |
+
@staticmethod
|
| 1755 |
+
def update_section_content(content: str, section_name: str, new_content: str) -> str:
|
| 1756 |
+
if not content: content = "# Zork Strategic Knowledge Base\n\n"
|
| 1757 |
+
section_header = f"## {section_name}"
|
| 1758 |
+
pattern = rf"## {re.escape(section_name)}(.*?)(?=\n## |$)"
|
| 1759 |
+
match = re.search(pattern, content, re.DOTALL)
|
| 1760 |
+
|
| 1761 |
+
full_new_section = f"{section_header}\n\n{new_content}\n"
|
| 1762 |
+
|
| 1763 |
+
if match:
|
| 1764 |
+
return content.replace(match.group(0), full_new_section, 1)
|
| 1765 |
+
else:
|
| 1766 |
+
return f"{content}\n\n{full_new_section}\n"
|
| 1767 |
+
|
| 1768 |
+
@staticmethod
|
| 1769 |
+
def extract_cross_episode_section(content: str) -> str:
|
| 1770 |
+
"""Extrait la section 'Wisdom' qui doit persister entre les parties."""
|
| 1771 |
+
return SectionUtils.extract_section_content(content, "CROSS-EPISODE INSIGHTS")
|
| 1772 |
+
|
| 1773 |
+
VALID_DIRECTIONS = {
|
| 1774 |
+
"n", "s", "e", "w", "ne", "nw", "se", "sw", "u", "d", "in", "out",
|
| 1775 |
+
"north", "south", "east", "west", "northeast", "northwest",
|
| 1776 |
+
"southeast", "southwest", "up", "down"
|
| 1777 |
+
}
|
| 1778 |
+
|
| 1779 |
+
class WorldMapper:
|
| 1780 |
+
def __init__(self):
|
| 1781 |
+
self.graph = {}
|
| 1782 |
+
self.last_room_id = None
|
| 1783 |
+
self.last_direction = None
|
| 1784 |
+
|
| 1785 |
+
def update_map(self, structured_data: dict, last_action: str, observation: str):
|
| 1786 |
+
curr_id = str(structured_data.get("location_id"))
|
| 1787 |
+
curr_name = structured_data.get("location_name")
|
| 1788 |
+
new_exits = structured_data.get("visible_exits", [])
|
| 1789 |
+
clean_directions = [d.lower() for d in new_exits if d.lower() in VALID_DIRECTIONS]
|
| 1790 |
+
print("cleand directions", clean_directions)
|
| 1791 |
+
|
| 1792 |
+
if curr_id not in self.graph:
|
| 1793 |
+
self.graph[curr_id] = {
|
| 1794 |
+
"name": curr_name,
|
| 1795 |
+
"exits": {},
|
| 1796 |
+
"blocked_exits" : {},
|
| 1797 |
+
"potential_exits": clean_directions,
|
| 1798 |
+
"puzzles": structured_data.get("puzzle_clues", []),
|
| 1799 |
+
"items": structured_data.get("takeable_objects", []),
|
| 1800 |
+
"scenery": structured_data.get("interactable_features", []),
|
| 1801 |
+
"visited_count": 1,
|
| 1802 |
+
"items_scanned": False ,
|
| 1803 |
+
"description": observation
|
| 1804 |
+
}
|
| 1805 |
+
else:
|
| 1806 |
+
self.graph[curr_id]["visited_count"] += 1
|
| 1807 |
+
self.graph[curr_id]["items"] = structured_data.get("takeable_objects", [])
|
| 1808 |
+
self.graph[curr_id]["puzzles"] = list(set(self.graph[curr_id]["puzzles"] + structured_data.get("puzzle_clues", [])))
|
| 1809 |
+
|
| 1810 |
+
existing_potentials = set(self.graph[curr_id].get("potential_exits", []))
|
| 1811 |
+
existing_potentials.update(clean_directions)
|
| 1812 |
+
self.graph[curr_id]["potential_exits"] = list(existing_potentials)
|
| 1813 |
+
|
| 1814 |
+
direction_taken = self._extract_direction(last_action)
|
| 1815 |
+
|
| 1816 |
+
if self.last_room_id and self.last_room_id != curr_id and direction_taken:
|
| 1817 |
+
# On vérifie si la direction prise était bien dans les "potentielles" de la pièce d'avant
|
| 1818 |
+
self.graph[self.last_room_id]["exits"][direction_taken.lower()] = curr_id
|
| 1819 |
+
|
| 1820 |
+
opp = self._get_opposite(direction_taken.lower())
|
| 1821 |
+
if opp:
|
| 1822 |
+
# On crée le lien inverse immédiatement
|
| 1823 |
+
self.graph[curr_id]["exits"][opp] = self.last_room_id
|
| 1824 |
+
print(f"🗺️ Map Link: {self.graph[self.last_room_id]['name']} <({direction_taken})--({opp})> {curr_name}")
|
| 1825 |
+
|
| 1826 |
+
self.last_room_id = curr_id
|
| 1827 |
+
|
| 1828 |
+
def mark_as_scanned(self, room_id: str):
|
| 1829 |
+
"""Appelé quand l'agent a fini d'examiner tous les objets d'une pièce."""
|
| 1830 |
+
if room_id in self.graph:
|
| 1831 |
+
self.graph[room_id]["items_scanned"] = True
|
| 1832 |
+
|
| 1833 |
+
def mark_blocked_exit(self, room_id: str, direction: str, reason: str):
|
| 1834 |
+
"""Stocke une direction qui a échoué pour ne plus la tenter inutilement."""
|
| 1835 |
+
room_id = str(room_id)
|
| 1836 |
+
if room_id in self.graph:
|
| 1837 |
+
if "blocked_exits" not in self.graph[room_id]:
|
| 1838 |
+
self.graph[room_id]["blocked_exits"] = {}
|
| 1839 |
+
|
| 1840 |
+
# On stocke la direction et la raison (ex: "locked", "too dark")
|
| 1841 |
+
self.graph[room_id]["blocked_exits"][direction.lower()] = reason
|
| 1842 |
+
|
| 1843 |
+
def _extract_direction(self, action: str) -> str:
|
| 1844 |
+
action = action.lower().strip()
|
| 1845 |
+
|
| 1846 |
+
# Mapping complet pour transformer chaque raccourci en toutes lettres
|
| 1847 |
+
mapping = {
|
| 1848 |
+
"n": "north",
|
| 1849 |
+
"s": "south",
|
| 1850 |
+
"e": "east",
|
| 1851 |
+
"w": "west",
|
| 1852 |
+
"u": "up",
|
| 1853 |
+
"d": "down",
|
| 1854 |
+
"ne": "northeast",
|
| 1855 |
+
"nw": "northwest",
|
| 1856 |
+
"se": "southeast",
|
| 1857 |
+
"sw": "southwest",
|
| 1858 |
+
"in": "inside",
|
| 1859 |
+
"out": "outside",
|
| 1860 |
+
"enter": "inside",
|
| 1861 |
+
"exit": "outside"
|
| 1862 |
+
}
|
| 1863 |
+
|
| 1864 |
+
# 1. On vérifie d'abord si l'action est un raccourci exact (ex: "ne")
|
| 1865 |
+
if action in mapping:
|
| 1866 |
+
return mapping[action]
|
| 1867 |
+
|
| 1868 |
+
# 2. Sinon, on cherche si un mot complet est présent dans la phrase (ex: "go northeast")
|
| 1869 |
+
# On trie par longueur décroissante pour ne pas matcher "north" si c'est "northeast"
|
| 1870 |
+
full_directions = sorted(mapping.values(), key=len, reverse=True)
|
| 1871 |
+
for d in full_directions:
|
| 1872 |
+
if d in action:
|
| 1873 |
+
return d
|
| 1874 |
+
|
| 1875 |
+
return None
|
| 1876 |
+
|
| 1877 |
+
def get_navigation_guidance(self, current_id: str, visible_exits: list) -> list:
|
| 1878 |
+
current_id = str(current_id)
|
| 1879 |
+
room_data = self.graph.get(current_id, {})
|
| 1880 |
+
known_exits = room_data.get("exits", {}) # Ce qu'on a déjà traversé
|
| 1881 |
+
|
| 1882 |
+
nav_lines = []
|
| 1883 |
+
for direction in visible_exits:
|
| 1884 |
+
dir_lower = direction.lower()
|
| 1885 |
+
if dir_lower in known_exits:
|
| 1886 |
+
target_id = known_exits[dir_lower]
|
| 1887 |
+
target_name = self.graph.get(target_id, {}).get("name", "Unknown")
|
| 1888 |
+
nav_lines.append(f"- {direction.upper()} : Already taken (leads to {target_name})")
|
| 1889 |
+
else:
|
| 1890 |
+
# C'est ici qu'on pousse l'agent à explorer !
|
| 1891 |
+
nav_lines.append(f"- {direction.upper()} : 🌟 [NEW PATHWAY - TRY THIS]")
|
| 1892 |
+
|
| 1893 |
+
return nav_lines
|
| 1894 |
+
|
| 1895 |
+
def _get_opposite(self, direction: str) -> str:
|
| 1896 |
+
opposites = {
|
| 1897 |
+
"north": "south", "south": "north",
|
| 1898 |
+
"east": "west", "west": "east",
|
| 1899 |
+
"up": "down", "down": "up",
|
| 1900 |
+
"northeast": "southwest", "southwest": "northeast",
|
| 1901 |
+
"northwest": "southeast", "southeast": "northwest",
|
| 1902 |
+
"inside": "outside", "outside": "inside",
|
| 1903 |
+
"n": "s", "s": "n", "e": "w", "w": "e",
|
| 1904 |
+
"u": "d", "d": "u", "ne": "sw", "sw": "ne",
|
| 1905 |
+
"nw": "se", "se": "nw", "in": "out", "out": "in"
|
| 1906 |
+
}
|
| 1907 |
+
return opposites.get(direction)
|
| 1908 |
+
|
| 1909 |
+
def generate_summary(self, current_id: str) -> str:
|
| 1910 |
+
"""Génère une vision stratégique complète pour le prompt de l'IA."""
|
| 1911 |
+
current_id = str(current_id)
|
| 1912 |
+
if current_id not in self.graph:
|
| 1913 |
+
return "### 🗺️ MAP: Position current unknown in strategic memory."
|
| 1914 |
+
|
| 1915 |
+
summary = ["### ����️ STRATEGIC MAP & SPATIAL MEMORY:"]
|
| 1916 |
+
|
| 1917 |
+
# 1. SORTIES ET NAVIGATION IMMÉDIATE
|
| 1918 |
+
summary.append(f"**Current Location:** {self.graph[current_id]['name']}")
|
| 1919 |
+
exits = self.graph[current_id].get("exits", {})
|
| 1920 |
+
if exits:
|
| 1921 |
+
for direction, target_id in exits.items():
|
| 1922 |
+
room = self.graph.get(target_id, {})
|
| 1923 |
+
name = room.get("name", "Unknown Area")
|
| 1924 |
+
# Indicateur visuel pour savoir si on a fini le boulot là-bas
|
| 1925 |
+
status = "✅ Scanned" if room.get("items_scanned") else "🔎 Items left"
|
| 1926 |
+
summary.append(f"- {direction.upper()} -> {name} ({status})")
|
| 1927 |
+
else:
|
| 1928 |
+
summary.append("- No known exits explored yet from here.")
|
| 1929 |
+
|
| 1930 |
+
# 2. OBJETS DISPONIBLES DANS LE MONDE (AILLEURS)
|
| 1931 |
+
# On ne liste pas la salle actuelle car elle est déjà dans l'observation RAM
|
| 1932 |
+
other_rooms_with_stuff = []
|
| 1933 |
+
for r_id, r_data in self.graph.items():
|
| 1934 |
+
if r_id != current_id:
|
| 1935 |
+
items = r_data.get("items", [])
|
| 1936 |
+
features = r_data.get("scenery", [])
|
| 1937 |
+
|
| 1938 |
+
if items or features:
|
| 1939 |
+
room_info = f"- In {r_data['name']}:"
|
| 1940 |
+
if items: room_info += f" Items: [{', '.join(items)}]"
|
| 1941 |
+
if features: room_info += f" Scenery: {', '.join(features)}"
|
| 1942 |
+
other_rooms_with_stuff.append(room_info)
|
| 1943 |
+
|
| 1944 |
+
if other_rooms_with_stuff:
|
| 1945 |
+
summary.append("\n**🌍 WORLD OBJECTS (Memory):**")
|
| 1946 |
+
summary.extend(other_rooms_with_stuff)
|
| 1947 |
+
|
| 1948 |
+
# 3. PUZZLES ET OBSTACLES (VISION GLOBALE)
|
| 1949 |
+
unsolved_puzzles = []
|
| 1950 |
+
for r_id, r_data in self.graph.items():
|
| 1951 |
+
for p in r_data.get("puzzles", []):
|
| 1952 |
+
unsolved_puzzles.append(f"[{r_data['name']}] {p}")
|
| 1953 |
+
|
| 1954 |
+
# if unsolved_puzzles:
|
| 1955 |
+
# summary.append("\n**⚠️ ACTIVE PUZZLES / OBSTACLES:**")
|
| 1956 |
+
# summary.extend([f"- {p}" for p in unsolved_puzzles])
|
| 1957 |
+
|
| 1958 |
+
# 4. ÉTAT D'EXPLORATION DE LA SALLE ACTUELLE
|
| 1959 |
+
# is_scanned = self.graph[current_id].get("items_scanned", False)
|
| 1960 |
+
# summary.append(f"\n**STATUS:** {'Room fully examined. You can move.' if is_scanned else 'New items detected here. Examine them before leaving.'}")
|
| 1961 |
+
summary.append(f"**Current Location:** {self.graph[current_id]['name']}")
|
| 1962 |
+
|
| 1963 |
+
potential = self.graph[current_id].get("potential_exits", [])
|
| 1964 |
+
known = self.graph[current_id].get("exits", {})
|
| 1965 |
+
blocked = self.graph[current_id].get("blocked_exits", {})
|
| 1966 |
+
|
| 1967 |
+
summary.append("**Immediate Navigation:**")
|
| 1968 |
+
# On fusionne tout pour ne rien rater
|
| 1969 |
+
all_possible = set(potential) | set(known.keys()) | set(blocked.keys())
|
| 1970 |
+
|
| 1971 |
+
if not all_possible :
|
| 1972 |
+
summary.append("- ⚠️ NO EXITS DETECTED IN SCAN.")
|
| 1973 |
+
summary.append("- **HYPOTHETICAL DIRECTIONS:** [NORTH, SOUTH, EAST, WEST,NORTHEAST,NORTHWEST, SOUTHEAST, SOUTHWEST , UP, DOWN]")
|
| 1974 |
+
summary.append("- **ADVICE:** Use 'LOOK' or 'SEARCH' to confirm exits before moving, otherwise you might hit a wall.")
|
| 1975 |
+
|
| 1976 |
+
for d in all_possible:
|
| 1977 |
+
d_l = d.lower()
|
| 1978 |
+
if d_l in blocked:
|
| 1979 |
+
summary.append(f"- {d.upper()} : 🚫 BLOCKED ({blocked[d_l]})")
|
| 1980 |
+
elif d_l in known:
|
| 1981 |
+
target_name = self.graph.get(known[d_l], {}).get("name", "Unknown")
|
| 1982 |
+
summary.append(f"- {d.upper()} : Leads to {target_name} ✅")
|
| 1983 |
+
else:
|
| 1984 |
+
summary.append(f"- {d.upper()} : 🌟 [NEW - NEVER TESTED]")
|
| 1985 |
+
|
| 1986 |
+
room_data = self.graph.get(current_id, {})
|
| 1987 |
+
|
| 1988 |
+
# Affichage des sorties bloquées
|
| 1989 |
+
blocked = room_data.get("blocked_exits", {})
|
| 1990 |
+
if blocked:
|
| 1991 |
+
summary.append("\n**🚫 BLOCKED / FAILED DIRECTIONS:**")
|
| 1992 |
+
for d, reason in blocked.items():
|
| 1993 |
+
summary.append(f"- {d.upper()} : {reason}")
|
| 1994 |
+
|
| 1995 |
+
# Affichage des notes (ex: One-way)
|
| 1996 |
+
for note in room_data.get("notes", []):
|
| 1997 |
+
summary.append(f"- ⚠️ {note}")
|
| 1998 |
+
|
| 1999 |
+
return "\n".join(summary)
|
| 2000 |
|
| 2001 |
|
| 2002 |
if __name__ == "__main__":
|
map_graph.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
map_graph.py
|
| 3 |
+
Gère la structure du graphe, la persistance JSON et les algorithmes spatiaux (BFS).
|
| 4 |
+
"""
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
from dataclasses import dataclass, field, asdict
|
| 8 |
+
from typing import Dict, List, Optional, Set, Tuple
|
| 9 |
+
from collections import deque
|
| 10 |
+
|
| 11 |
+
@dataclass
|
| 12 |
+
class Connection:
|
| 13 |
+
to_id: int
|
| 14 |
+
direction: str
|
| 15 |
+
is_verified: bool = True # True si on l'a vraiment traversé
|
| 16 |
+
|
| 17 |
+
@dataclass
|
| 18 |
+
class RoomNode:
|
| 19 |
+
id: int
|
| 20 |
+
name: str
|
| 21 |
+
# Connexions: direction -> Connection object
|
| 22 |
+
exits: Dict[str, Connection] = field(default_factory=dict)
|
| 23 |
+
# Murs/Bloquages: direction -> nombre d'échecs
|
| 24 |
+
failed_exits: Dict[str, int] = field(default_factory=dict)
|
| 25 |
+
visited_count: int = 1
|
| 26 |
+
|
| 27 |
+
class MapGraph:
|
| 28 |
+
def __init__(self):
|
| 29 |
+
self.rooms: Dict[int, RoomNode] = {}
|
| 30 |
+
|
| 31 |
+
# Mapping des directions inverses pour l'inférence
|
| 32 |
+
self.inverse_dirs = {
|
| 33 |
+
"n": "s", "s": "n", "e": "w", "w": "e",
|
| 34 |
+
"north": "south", "south": "north",
|
| 35 |
+
"east": "west", "west": "east",
|
| 36 |
+
"up": "down", "down": "up",
|
| 37 |
+
"ne": "sw", "sw": "ne", "nw": "se", "se": "nw",
|
| 38 |
+
"enter": "exit", "exit": "enter", "in": "out", "out": "in"
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
def add_or_update_room(self, room_id: int, name: str):
|
| 42 |
+
"""Crée ou met à jour un noeud."""
|
| 43 |
+
if room_id not in self.rooms:
|
| 44 |
+
self.rooms[room_id] = RoomNode(id=room_id, name=name)
|
| 45 |
+
else:
|
| 46 |
+
self.rooms[room_id].visited_count += 1
|
| 47 |
+
# On garde le nom le plus long (souvent le plus descriptif)
|
| 48 |
+
if len(name) > len(self.rooms[room_id].name):
|
| 49 |
+
self.rooms[room_id].name = name
|
| 50 |
+
|
| 51 |
+
def add_connection(self, from_id: int, to_id: int, direction: str):
|
| 52 |
+
"""Ajoute une arête dirigée."""
|
| 53 |
+
if from_id not in self.rooms or to_id not in self.rooms:
|
| 54 |
+
return
|
| 55 |
+
|
| 56 |
+
# 1. Connexion directe (Vérifiée)
|
| 57 |
+
self.rooms[from_id].exits[direction] = Connection(to_id=to_id, direction=direction, is_verified=True)
|
| 58 |
+
|
| 59 |
+
# 2. Si une connexion précédente échouée existait, on la nettoie (Pruning)
|
| 60 |
+
if direction in self.rooms[from_id].failed_exits:
|
| 61 |
+
del self.rooms[from_id].failed_exits[direction]
|
| 62 |
+
|
| 63 |
+
# 3. Connexion inverse (Inférée/Non vérifiée)
|
| 64 |
+
# On suppose que si on va au Nord, le Sud ramène au départ.
|
| 65 |
+
rev_dir = self.inverse_dirs.get(direction)
|
| 66 |
+
if rev_dir:
|
| 67 |
+
target_room = self.rooms[to_id]
|
| 68 |
+
# On ajoute seulement si l'exit n'existe pas déjà
|
| 69 |
+
if rev_dir not in target_room.exits:
|
| 70 |
+
target_room.exits[rev_dir] = Connection(to_id=from_id, direction=rev_dir, is_verified=False)
|
| 71 |
+
|
| 72 |
+
def record_failure(self, room_id: int, direction: str):
|
| 73 |
+
"""Enregistre un mur/bloquage."""
|
| 74 |
+
if room_id in self.rooms:
|
| 75 |
+
self.rooms[room_id].failed_exits[direction] = self.rooms[room_id].failed_exits.get(direction, 0) + 1
|
| 76 |
+
|
| 77 |
+
def get_shortest_path(self, start_id: int, target_id: int) -> Optional[List[str]]:
|
| 78 |
+
"""BFS pour trouver le chemin le plus court."""
|
| 79 |
+
queue = deque([(start_id, [])])
|
| 80 |
+
visited = {start_id}
|
| 81 |
+
|
| 82 |
+
while queue:
|
| 83 |
+
curr, path = queue.popleft()
|
| 84 |
+
if curr == target_id:
|
| 85 |
+
return path
|
| 86 |
+
|
| 87 |
+
if curr in self.rooms:
|
| 88 |
+
for d, conn in self.rooms[curr].exits.items():
|
| 89 |
+
if conn.to_id not in visited:
|
| 90 |
+
visited.add(conn.to_id)
|
| 91 |
+
queue.append((conn.to_id, path + [d]))
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
def get_hubs(self) -> List[str]:
|
| 95 |
+
"""Retourne les noms des salles importantes (Hubs) avec >3 sorties."""
|
| 96 |
+
return [r.name for r in self.rooms.values() if len(r.exits) > 3]
|
| 97 |
+
|
| 98 |
+
# --- Persistance JSON ---
|
| 99 |
+
def save(self, filepath: str):
|
| 100 |
+
data = {
|
| 101 |
+
str(rid): asdict(node) for rid, node in self.rooms.items()
|
| 102 |
+
}
|
| 103 |
+
with open(filepath, 'w') as f:
|
| 104 |
+
json.dump(data, f, indent=2)
|
| 105 |
+
|
| 106 |
+
def load(self, filepath: str):
|
| 107 |
+
if not os.path.exists(filepath): return
|
| 108 |
+
try:
|
| 109 |
+
with open(filepath, 'r') as f:
|
| 110 |
+
data = json.load(f)
|
| 111 |
+
self.rooms = {}
|
| 112 |
+
for rid, rdata in data.items():
|
| 113 |
+
node = RoomNode(id=int(rid), name=rdata['name'], visited_count=rdata['visited_count'])
|
| 114 |
+
node.failed_exits = rdata['failed_exits']
|
| 115 |
+
# Reconstruire les objets Connection
|
| 116 |
+
node.exits = {d: Connection(**c) for d, c in rdata['exits'].items()}
|
| 117 |
+
self.rooms[int(rid)] = node
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"Error loading map: {e}")
|
mcp_server.py
CHANGED
|
@@ -1,27 +1,8 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
Required tool:
|
| 8 |
-
play_action(action: str) -> str
|
| 9 |
-
Execute a game command and return the result.
|
| 10 |
-
|
| 11 |
-
Recommended tools:
|
| 12 |
-
memory() -> str
|
| 13 |
-
Return current game state, score, and recent history.
|
| 14 |
-
|
| 15 |
-
inventory() -> str
|
| 16 |
-
Return the player's current inventory.
|
| 17 |
-
|
| 18 |
-
get_map() -> str
|
| 19 |
-
Return a map of explored locations.
|
| 20 |
-
|
| 21 |
-
Test your server with:
|
| 22 |
-
fastmcp dev submission_template/mcp_server.py
|
| 23 |
-
|
| 24 |
-
Then open the MCP Inspector in your browser to test the tools interactively.
|
| 25 |
"""
|
| 26 |
|
| 27 |
import sys
|
|
@@ -31,179 +12,669 @@ import os
|
|
| 31 |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 32 |
|
| 33 |
from fastmcp import FastMCP
|
| 34 |
-
from games.zork_env import TextAdventureEnv
|
| 35 |
-
|
| 36 |
|
| 37 |
-
|
| 38 |
-
# Create the MCP Server
|
| 39 |
-
# =============================================================================
|
| 40 |
|
| 41 |
-
|
|
|
|
| 42 |
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
# =============================================================================
|
| 45 |
-
# Game State Management
|
| 46 |
-
# =============================================================================
|
| 47 |
|
| 48 |
-
class
|
| 49 |
-
"""
|
| 50 |
-
Manages the text adventure game state.
|
| 51 |
-
|
| 52 |
-
TODO: Extend this class to track:
|
| 53 |
-
- Action history (for memory tool)
|
| 54 |
-
- Explored locations (for mapping)
|
| 55 |
-
- Current score and moves
|
| 56 |
-
"""
|
| 57 |
|
| 58 |
-
def __init__(self):
|
| 59 |
-
self.env: TextAdventureEnv = None
|
| 60 |
-
self.state = None
|
| 61 |
-
self.game_name: str = ""
|
| 62 |
-
# TODO: Add more state tracking
|
| 63 |
-
# self.history: list[tuple[str, str]] = []
|
| 64 |
-
# self.explored_locations: dict[str, set[str]] = {}
|
| 65 |
-
# self.current_location: str = ""
|
| 66 |
-
|
| 67 |
-
def initialize(self, game: str = "zork1"):
|
| 68 |
-
"""Initialize or reset the game."""
|
| 69 |
self.game_name = game
|
| 70 |
self.env = TextAdventureEnv(game)
|
| 71 |
self.state = self.env.reset()
|
| 72 |
-
|
| 73 |
-
|
|
|
|
| 74 |
|
| 75 |
-
def
|
| 76 |
-
"""
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
self.state = self.env.step(action)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
#
|
| 83 |
-
#
|
| 84 |
-
|
| 85 |
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
-
def
|
| 89 |
-
"""Get
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
-
def
|
| 93 |
-
"""Get
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
|
| 97 |
-
# Global game
|
| 98 |
-
|
| 99 |
|
| 100 |
|
| 101 |
-
def get_game() ->
|
| 102 |
-
"""Get or initialize the game
|
| 103 |
-
global
|
| 104 |
-
if
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
_game.initialize(game)
|
| 108 |
-
return _game
|
| 109 |
|
| 110 |
|
| 111 |
# =============================================================================
|
| 112 |
-
# MCP Tools
|
| 113 |
# =============================================================================
|
| 114 |
|
| 115 |
@mcp.tool()
|
| 116 |
def play_action(action: str) -> str:
|
| 117 |
"""
|
| 118 |
-
Execute a game
|
| 119 |
-
|
| 120 |
-
This is the main tool for interacting with the game.
|
| 121 |
|
| 122 |
Args:
|
| 123 |
-
action: The command to execute (e.g.,
|
| 124 |
-
|
| 125 |
Returns:
|
| 126 |
-
The game's response to
|
| 127 |
-
|
| 128 |
-
Valid commands include:
|
| 129 |
-
- Movement: north, south, east, west, up, down, enter, exit
|
| 130 |
-
- Objects: take <item>, drop <item>, open <thing>, examine <thing>
|
| 131 |
-
- Other: look, inventory, read <thing>, turn on lamp
|
| 132 |
"""
|
| 133 |
game = get_game()
|
|
|
|
| 134 |
|
| 135 |
-
#
|
| 136 |
-
|
| 137 |
|
| 138 |
-
|
|
|
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
|
|
|
| 142 |
|
| 143 |
-
return result
|
| 144 |
|
| 145 |
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
# # TODO: Return useful state information
|
| 158 |
-
# pass
|
| 159 |
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
# @mcp.tool()
|
| 162 |
-
# def
|
| 163 |
-
# """
|
| 164 |
-
# Check what the player is carrying.
|
| 165 |
-
#
|
| 166 |
-
# Returns:
|
| 167 |
-
# List of items in the player's inventory
|
| 168 |
-
# """
|
| 169 |
# game = get_game()
|
| 170 |
-
#
|
| 171 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
|
| 174 |
# @mcp.tool()
|
| 175 |
-
# def
|
| 176 |
# """
|
| 177 |
-
#
|
| 178 |
-
#
|
| 179 |
-
# Returns:
|
| 180 |
-
# A text representation of explored locations and connections
|
| 181 |
# """
|
| 182 |
-
#
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
# @mcp.tool()
|
| 188 |
-
# def
|
| 189 |
# """
|
| 190 |
-
#
|
| 191 |
-
#
|
| 192 |
-
# Returns:
|
| 193 |
-
# List of actions that might work here
|
| 194 |
# """
|
| 195 |
-
#
|
| 196 |
-
#
|
| 197 |
-
#
|
| 198 |
-
|
| 199 |
-
#
|
| 200 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
# =============================================================================
|
| 204 |
-
#
|
| 205 |
# =============================================================================
|
| 206 |
|
| 207 |
if __name__ == "__main__":
|
| 208 |
-
# This runs the server with stdio transport (for MCP clients)
|
| 209 |
mcp.run()
|
|
|
|
| 1 |
"""
|
| 2 |
+
Example: MCP Server for Text Adventures
|
| 3 |
|
| 4 |
+
A complete MCP server that exposes text adventure games via tools.
|
| 5 |
+
This demonstrates a full-featured server with memory, mapping, and inventory.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import sys
|
|
|
|
| 12 |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 13 |
|
| 14 |
from fastmcp import FastMCP
|
| 15 |
+
from games.zork_env import TextAdventureEnv, list_available_games
|
|
|
|
| 16 |
|
| 17 |
+
import asyncio
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
# Get game from environment variable (default: zork1)
|
| 20 |
+
INITIAL_GAME = os.environ.get("GAME", "zork1")
|
| 21 |
|
| 22 |
+
# Create the MCP server
|
| 23 |
+
mcp = FastMCP("Text Adventure Server")
|
| 24 |
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
class GameState:
|
| 27 |
+
"""Manages the text adventure game state and exploration data."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
def __init__(self, game: str = "zork1"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
self.game_name = game
|
| 31 |
self.env = TextAdventureEnv(game)
|
| 32 |
self.state = self.env.reset()
|
| 33 |
+
self.history: list[tuple[str, str]] = []
|
| 34 |
+
self.explored_locations: dict[str, set[str]] = {}
|
| 35 |
+
self.current_location: str = self._extract_location(self.state.observation)
|
| 36 |
|
| 37 |
+
def _extract_location(self, observation: str) -> str:
|
| 38 |
+
"""Extract location name from observation (usually first line)."""
|
| 39 |
+
lines = observation.strip().split('\n')
|
| 40 |
+
return lines[0] if lines else "Unknown"
|
| 41 |
+
|
| 42 |
+
def clean_text(self, text: str) -> str:
|
| 43 |
+
"""Transforme 'obj91: pants parent87...' en 'pants'."""
|
| 44 |
+
text = str(text).lower()
|
| 45 |
+
if ":" in text:
|
| 46 |
+
# On prend la partie après le matricule obj
|
| 47 |
+
name_part = text.split(":", 1)[1].strip()
|
| 48 |
+
# On coupe avant les métadonnées techniques
|
| 49 |
+
for stop_word in [" parent", " sibling", " child", " attributes"]:
|
| 50 |
+
if stop_word in name_part:
|
| 51 |
+
name_part = name_part.split(stop_word)[0].strip()
|
| 52 |
+
return name_part
|
| 53 |
+
return text
|
| 54 |
+
|
| 55 |
+
def take_action(self, action: str) -> str:
|
| 56 |
+
"""Execute a game action and return the result."""
|
| 57 |
self.state = self.env.step(action)
|
| 58 |
+
result = self.state.observation
|
| 59 |
+
|
| 60 |
+
# Track history
|
| 61 |
+
self.history.append((action, result))
|
| 62 |
+
if len(self.history) > 50:
|
| 63 |
+
self.history = self.history[-50:]
|
| 64 |
+
|
| 65 |
+
# Update map
|
| 66 |
+
new_location = self._extract_location(result)
|
| 67 |
+
if action in ["north", "south", "east", "west", "up", "down",
|
| 68 |
+
"enter", "exit", "n", "s", "e", "w", "u", "d"]:
|
| 69 |
+
if self.current_location not in self.explored_locations:
|
| 70 |
+
self.explored_locations[self.current_location] = set()
|
| 71 |
+
if new_location != self.current_location:
|
| 72 |
+
self.explored_locations[self.current_location].add(f"{action} -> {new_location}")
|
| 73 |
+
self.current_location = new_location
|
| 74 |
+
|
| 75 |
+
return result
|
| 76 |
+
|
| 77 |
+
# def get_heuristic_actions(self) -> str:
|
| 78 |
+
# """
|
| 79 |
+
# Génère des actions ultra-précises en fusionnant l'observation textuelle
|
| 80 |
+
# et le scan de la mémoire vive du jeu (God Mode).
|
| 81 |
+
# """
|
| 82 |
+
|
| 83 |
+
# self._debug_log("HEURISTIC - STRUCTURED")
|
| 84 |
+
|
| 85 |
+
# # visible_objs = structured_data.get("takeable_objects", [])
|
| 86 |
+
# # puzzles = structured_data.get("puzzle_clues", [])
|
| 87 |
+
# # exits = structured_data.get("visible_exits", [])
|
| 88 |
+
|
| 89 |
+
# categorized_actions: Dict[str, Set[str]] = {
|
| 90 |
+
# "navigation": {"north", "south", "east", "west","northeast", "northwest", "southeast", "southwest","up", "down", "in", "out"},
|
| 91 |
+
# "system": {"look", "inventory", "wait"},
|
| 92 |
+
# "inventory_interactions": set(),
|
| 93 |
+
# "environment_interactions": set(),
|
| 94 |
+
# "contextual": set()
|
| 95 |
+
# }
|
| 96 |
+
|
| 97 |
+
# BASE_ACTIONS = {
|
| 98 |
+
# "look",
|
| 99 |
+
# "inventory",
|
| 100 |
+
# "wait"
|
| 101 |
+
# }
|
| 102 |
+
|
| 103 |
+
# DIRECTIONS = {
|
| 104 |
+
# "north", "south", "east", "west","northwest","northeast","southeast","southwest"
|
| 105 |
+
# "up", "down", "in", "out",
|
| 106 |
+
# "n", "s", "e", "w", "u", "d","nw","ne","se","sw"
|
| 107 |
+
# }
|
| 108 |
+
|
| 109 |
+
# INVENTORY_VERBS = {
|
| 110 |
+
# "examine",
|
| 111 |
+
# "use",
|
| 112 |
+
# "drop",
|
| 113 |
+
# }
|
| 114 |
+
|
| 115 |
+
# ROOM_VERBS = {
|
| 116 |
+
# "examine",
|
| 117 |
+
# "open",
|
| 118 |
+
# "close",
|
| 119 |
+
# "push",
|
| 120 |
+
# "pull",
|
| 121 |
+
# "take",
|
| 122 |
+
# }
|
| 123 |
+
|
| 124 |
+
# inventory_items = [i['name'] for i in self.state.inventory_raw] # Liste de dicts {'name':...}
|
| 125 |
+
# room_items = structured_data.get("raw_ram_objects", [])
|
| 126 |
+
|
| 127 |
+
# available_verbs = ["examine", "take", "drop", "look", "inventory", "wait"]
|
| 128 |
+
|
| 129 |
+
# actions: Set[str] = set()
|
| 130 |
+
|
| 131 |
+
# obs = self.state.observation.lower()
|
| 132 |
+
|
| 133 |
+
# clean_inventory = [self.clean_text(i) for i in self.state.inventory]
|
| 134 |
+
|
| 135 |
+
# for item in clean_inventory:
|
| 136 |
+
# for verb in INVENTORY_VERBS:
|
| 137 |
+
# categorized_actions["inventory_interactions"].add(f"{verb} {item}")
|
| 138 |
+
|
| 139 |
+
# if "door" in obs:
|
| 140 |
+
# categorized_actions["environment_interactions"] |= {"open door", "close door", "examine door"}
|
| 141 |
+
|
| 142 |
+
# if "window" in obs:
|
| 143 |
+
# categorized_actions["environment_interactions"] |= {"open window", "examine window"}
|
| 144 |
+
|
| 145 |
+
# if "stairs" in obs or "staircase" in obs:
|
| 146 |
+
# actions |= {"up", "down"}
|
| 147 |
+
# categorized_actions["contextual"] |= {"up stairs", "down stairs"}
|
| 148 |
+
|
| 149 |
+
# if "dark" in obs or "can't see" in obs:
|
| 150 |
+
# for item in clean_inventory:
|
| 151 |
+
# if any(k in item for k in ["lamp", "torch", "light"]):
|
| 152 |
+
# categorized_actions["inventory_interactions"].add(f"use {item}")
|
| 153 |
+
# categorized_actions["inventory_interactions"].add(f"turn on {item}")
|
| 154 |
+
|
| 155 |
+
# if any(k in obs for k in ["man", "woman", "person", "creature"]):
|
| 156 |
+
# categorized_actions["contextual"] |= {"talk", "listen", "attack"}
|
| 157 |
+
|
| 158 |
+
# if any(k in obs for k in ["noise", "sound", "voice", "squeal"]):
|
| 159 |
+
# categorized_actions["contextual"].add("listen")
|
| 160 |
+
# categorized_actions["contextual"].add("examine noise")
|
| 161 |
+
|
| 162 |
+
# # --- 7. Anti-boucle simple ---
|
| 163 |
+
# if self.history:
|
| 164 |
+
# last_action = self.history[-1]
|
| 165 |
+
# if last_action in actions:
|
| 166 |
+
# actions.remove(last_action)
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
# final_output = []
|
| 170 |
+
# for category, acts in categorized_actions.items():
|
| 171 |
+
# if acts:
|
| 172 |
+
# # On trie et on formate chaque catégorie
|
| 173 |
+
# sorted_acts = sorted(list(acts))
|
| 174 |
+
# category_text = f"\n{category.replace('_', ' ').upper()}:\n"
|
| 175 |
+
# category_text += "\n".join([f" - {a}" for a in sorted_acts])
|
| 176 |
+
# final_output.append(category_text)
|
| 177 |
+
|
| 178 |
+
# return "\n".join(final_output)
|
| 179 |
+
|
| 180 |
+
def get_heuristic_actions(self, structured_data: dict) -> str:
|
| 181 |
+
"""
|
| 182 |
+
Fournit au LLM les briques nécessaires pour construire ses propres actions
|
| 183 |
+
en se basant sur la vérité de la RAM.
|
| 184 |
+
"""
|
| 185 |
+
self._debug_log("HEURISTIC - COMPONENT BASED")
|
| 186 |
+
|
| 187 |
+
# 1. Récupération des objets réels (RAM + Inventaire)
|
| 188 |
+
# On utilise les noms exacts venant de la RAM pour éviter les erreurs
|
| 189 |
+
inventory_items = [i['name'] for i in self.state.inventory_raw] # Liste de dicts {'name':...}
|
| 190 |
+
room_items = structured_data.get("raw_ram_objects", [])
|
| 191 |
|
| 192 |
+
# 2. Définition des Verbes (Le "Lexique")
|
| 193 |
+
# On ne donne que les verbes pertinents à la situation
|
| 194 |
+
available_verbs = ["examine", "take", "drop", "look", "inventory", "wait"]
|
| 195 |
|
| 196 |
+
obs = self.state.observation.lower()
|
| 197 |
+
|
| 198 |
+
if any(k in obs for k in ["door", "window", "container", "box"]):
|
| 199 |
+
available_verbs += ["open", "close", "unlock"]
|
| 200 |
+
|
| 201 |
+
if any(k in obs for k in ["noise", "sound", "squeal", "quiet"]):
|
| 202 |
+
available_verbs.append("listen")
|
| 203 |
+
|
| 204 |
+
if "dark" in obs or "torch" in str(inventory_items):
|
| 205 |
+
available_verbs += ["turn on", "light"]
|
| 206 |
+
|
| 207 |
+
# 3. Construction du message d'aide
|
| 208 |
+
final_output = [
|
| 209 |
+
"### ACTION CONSTRUCTION KIT",
|
| 210 |
+
f"VERBS: {', '.join(sorted(set(available_verbs)))}",
|
| 211 |
+
f"INVENTORY: {', '.join(inventory_items) if inventory_items else 'Empty'}",
|
| 212 |
+
f"ROOM OBJECTS: {', '.join(room_items) if room_items else 'None visible'}",
|
| 213 |
+
"\nNAVIGATION: n, s, e, w, ne, nw, se, sw, u, d, in, out",
|
| 214 |
+
"\nEXAMPLES: 'examine tracks', 'turn on torch', 'open mailbox', 'listen'"
|
| 215 |
+
]
|
| 216 |
+
|
| 217 |
+
return "\n".join(final_output)
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
def _debug_log(self, message: str):
|
| 221 |
+
"""Envoie un log de debug vers stderr pour ne pas polluer le flux MCP."""
|
| 222 |
+
print(f"DEBUG_HEURISTIC: {message}", file=sys.stderr, flush=True)
|
| 223 |
+
|
| 224 |
+
def get_memory(self) -> str:
|
| 225 |
+
"""Get a summary of current game state."""
|
| 226 |
+
recent = self.history[-5:] if self.history else []
|
| 227 |
+
recent_str = "\n".join([f" > {a} -> {r[:60]}..." for a, r in recent]) if recent else " (none yet)"
|
| 228 |
+
|
| 229 |
+
return f"""Current State:
|
| 230 |
+
- Location: {self.current_location}
|
| 231 |
+
- Score: {self.state.score} points
|
| 232 |
+
- Moves: {self.state.moves}
|
| 233 |
+
- Game: {self.game_name}
|
| 234 |
+
|
| 235 |
+
Recent Actions:
|
| 236 |
+
{recent_str}
|
| 237 |
+
|
| 238 |
+
Current Observation:
|
| 239 |
+
{self.state.observation}"""
|
| 240 |
|
| 241 |
+
def get_map(self) -> str:
|
| 242 |
+
"""Get a map of explored locations."""
|
| 243 |
+
if not self.explored_locations:
|
| 244 |
+
return "Map: No locations explored yet. Try moving around!"
|
| 245 |
+
|
| 246 |
+
lines = ["Explored Locations and Exits:"]
|
| 247 |
+
for loc, exits in sorted(self.explored_locations.items()):
|
| 248 |
+
lines.append(f"\n* {loc}")
|
| 249 |
+
for exit_info in sorted(exits):
|
| 250 |
+
lines.append(f" -> {exit_info}")
|
| 251 |
+
|
| 252 |
+
lines.append(f"\n[Current] {self.current_location}")
|
| 253 |
+
return "\n".join(lines)
|
| 254 |
|
| 255 |
+
def get_inventory(self) -> str:
|
| 256 |
+
"""Get current inventory."""
|
| 257 |
+
items = self.state.inventory if hasattr(self.state, 'inventory') and self.state.inventory else []
|
| 258 |
+
|
| 259 |
+
if not items:
|
| 260 |
+
return "Inventory: You are empty-handed."
|
| 261 |
+
|
| 262 |
+
item_names = []
|
| 263 |
+
for item in items:
|
| 264 |
+
item_str = str(item)
|
| 265 |
+
item_lower = item_str.lower()
|
| 266 |
+
if "parent" in item_lower:
|
| 267 |
+
idx = item_lower.index("parent")
|
| 268 |
+
name = item_str[:idx].strip()
|
| 269 |
+
if ":" in name:
|
| 270 |
+
name = name.split(":", 1)[1].strip()
|
| 271 |
+
item_names.append(name)
|
| 272 |
+
elif ":" in item_str:
|
| 273 |
+
name = item_str.split(":")[1].strip()
|
| 274 |
+
item_names.append(name)
|
| 275 |
+
else:
|
| 276 |
+
item_names.append(item_str)
|
| 277 |
+
|
| 278 |
+
return f"Inventory: {', '.join(item_names)}"
|
| 279 |
|
| 280 |
|
| 281 |
+
# Global game state
|
| 282 |
+
_game_state: GameState | None = None
|
| 283 |
|
| 284 |
|
| 285 |
+
def get_game() -> GameState:
|
| 286 |
+
"""Get or initialize the game state."""
|
| 287 |
+
global _game_state
|
| 288 |
+
if _game_state is None:
|
| 289 |
+
_game_state = GameState(INITIAL_GAME)
|
| 290 |
+
return _game_state
|
|
|
|
|
|
|
| 291 |
|
| 292 |
|
| 293 |
# =============================================================================
|
| 294 |
+
# MCP Tools
|
| 295 |
# =============================================================================
|
| 296 |
|
| 297 |
@mcp.tool()
|
| 298 |
def play_action(action: str) -> str:
|
| 299 |
"""
|
| 300 |
+
Execute a game action in the text adventure.
|
|
|
|
|
|
|
| 301 |
|
| 302 |
Args:
|
| 303 |
+
action: The command to execute (e.g., 'north', 'take lamp', 'open mailbox')
|
| 304 |
+
|
| 305 |
Returns:
|
| 306 |
+
The game's response to your action
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
"""
|
| 308 |
game = get_game()
|
| 309 |
+
result = game.take_action(action)
|
| 310 |
|
| 311 |
+
# Add score info
|
| 312 |
+
score_info = f"\n\n[Score: {game.state.score} | Moves: {game.state.moves}]"
|
| 313 |
|
| 314 |
+
if game.state.reward > 0:
|
| 315 |
+
score_info = f"\n\n+{game.state.reward} points! (Total: {game.state.score})"
|
| 316 |
|
| 317 |
+
done_info = ""
|
| 318 |
+
if game.state.done:
|
| 319 |
+
done_info = "\n\nGAME OVER"
|
| 320 |
|
| 321 |
+
return result + score_info + done_info
|
| 322 |
|
| 323 |
|
| 324 |
+
@mcp.tool()
|
| 325 |
+
def memory() -> str:
|
| 326 |
+
"""
|
| 327 |
+
Get a summary of the current game state.
|
| 328 |
+
|
| 329 |
+
Returns location, score, moves, recent actions, and current observation.
|
| 330 |
+
"""
|
| 331 |
+
return get_game().get_memory()
|
| 332 |
|
| 333 |
+
|
| 334 |
+
@mcp.tool()
|
| 335 |
+
def get_map() -> str:
|
| 336 |
+
"""
|
| 337 |
+
Get a map showing explored locations and connections.
|
| 338 |
+
|
| 339 |
+
Useful for navigation and avoiding getting lost.
|
| 340 |
+
"""
|
| 341 |
+
return get_game().get_map()
|
|
|
|
|
|
|
| 342 |
|
| 343 |
|
| 344 |
+
@mcp.tool()
|
| 345 |
+
def inventory() -> str:
|
| 346 |
+
"""
|
| 347 |
+
Check what items you are currently carrying.
|
| 348 |
+
"""
|
| 349 |
+
return get_game().get_inventory()
|
| 350 |
+
|
| 351 |
+
# Dans mcp_server.py
|
| 352 |
+
|
| 353 |
# @mcp.tool()
|
| 354 |
+
# def get_valid_actions_cheat() -> str:
|
| 355 |
+
# """[GOD MODE] Liste des actions syntaxiques via Spacy/Jericho."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
# game = get_game()
|
| 357 |
+
# # On appelle la méthode de TON wrapper TextAdventureEnv
|
| 358 |
+
# actions = game.env.get_valid_actions()
|
| 359 |
+
# return "ACTIONS VALIDES :\n" + ", ".join(actions)
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
@mcp.tool()
|
| 363 |
+
async def get_valid_actions_cheat() -> str:
|
| 364 |
+
"""
|
| 365 |
+
[GOD MODE] Returns the list of ALL valid actions currently possible.
|
| 366 |
+
"""
|
| 367 |
+
# On utilise le verrou pour empêcher le calcul parallèle
|
| 368 |
+
try:
|
| 369 |
+
game = get_game()
|
| 370 |
+
jericho_env = game.env.env
|
| 371 |
+
|
| 372 |
+
# Jericho est synchrone, on l'exécute normalement
|
| 373 |
+
valid_actions = jericho_env.get_valid_actions(use_parallel=False)
|
| 374 |
+
|
| 375 |
+
if not valid_actions:
|
| 376 |
+
return "Jericho returned an empty list of actions."
|
| 377 |
+
|
| 378 |
+
return "VALID ACTIONS:\n" + ", ".join(valid_actions)
|
| 379 |
+
|
| 380 |
+
except Exception as e:
|
| 381 |
+
return f"Error retrieving valid actions: {str(e)}"
|
| 382 |
+
|
| 383 |
|
| 384 |
|
| 385 |
# @mcp.tool()
|
| 386 |
+
# def get_valid_actions_cheat() -> str:
|
| 387 |
# """
|
| 388 |
+
# [GOD MODE] Returns the absolute list of logical actions
|
| 389 |
+
# by scanning the game's internal memory tree.
|
|
|
|
|
|
|
| 390 |
# """
|
| 391 |
+
# return get_game().get_heuristic_actions()
|
| 392 |
+
|
| 393 |
+
@mcp.tool()
|
| 394 |
+
def detect_objects_cheat() -> str:
|
| 395 |
+
"""
|
| 396 |
+
[GOD MODE] Scans the game memory to find interactive objects in the current location.
|
| 397 |
+
Returns their internal game names.
|
| 398 |
+
"""
|
| 399 |
+
try:
|
| 400 |
+
# identify_interactive_objects renvoie les objets avec lesquels on peut interagir
|
| 401 |
+
# Note: Cette fonction Jericho n'est pas dispo pour TOUS les jeux, mais souvent pour Zork/LostPig
|
| 402 |
+
objs = env.identify_interactive_objects()
|
| 403 |
+
return f"OBJECTS IN MEMORY: {objs}"
|
| 404 |
+
except Exception as e:
|
| 405 |
+
return f"Feature not available: {e}"
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
@mcp.tool()
|
| 409 |
+
def cheat_sense_surroundings() -> str:
|
| 410 |
+
try:
|
| 411 |
+
game = get_game()
|
| 412 |
+
j_env = game.env.env
|
| 413 |
+
# player_loc = j_env.get_player_location() # Ex: 93
|
| 414 |
+
player_loc = 164
|
| 415 |
+
|
| 416 |
+
# On récupère tous les objets du jeu (souvent plusieurs centaines)
|
| 417 |
+
world_objs = j_env.get_world_objects()
|
| 418 |
+
contents = []
|
| 419 |
+
|
| 420 |
+
for obj in world_objs:
|
| 421 |
+
# On vérifie si le parent est la pièce actuelle
|
| 422 |
+
if obj.parent == player_loc:
|
| 423 |
+
name = str(obj.name).replace('\x00', '').strip()
|
| 424 |
+
# On ignore le joueur lui-même et les noms vides
|
| 425 |
+
if name and "player" not in name.lower():
|
| 426 |
+
contents.append(f"{name} (ID:{obj.num})")
|
| 427 |
+
|
| 428 |
+
return "VISIBLE IN MEMORY: " + (", ".join(contents) if contents else "Empty Room")
|
| 429 |
+
except Exception as e:
|
| 430 |
+
return f"Error: {e}"
|
| 431 |
+
|
| 432 |
+
@mcp.tool()
|
| 433 |
+
def get_game_dictionary() -> list[str]:
|
| 434 |
+
"""Récupère tous les mots compris par le jeu."""
|
| 435 |
+
game = get_game()
|
| 436 |
+
# Récupère les objets DictionaryWord
|
| 437 |
+
words = game.env.env.get_dictionary()
|
| 438 |
+
return [w.word for w in words]
|
| 439 |
+
|
| 440 |
+
@mcp.tool()
|
| 441 |
+
def get_action_grammar() -> str:
|
| 442 |
+
"""Récupère les modèles de phrases autorisées (ex: 'put X on Y')."""
|
| 443 |
+
game = get_game()
|
| 444 |
+
return game.env.env.bindings.get('grammar', 'Non disponible')
|
| 445 |
+
|
| 446 |
+
|
| 447 |
+
@mcp.tool()
|
| 448 |
+
def cheat_get_valid_exits() -> str:
|
| 449 |
+
"""[GOD MODE] Analyse les propriétés de la pièce actuelle pour trouver les sorties."""
|
| 450 |
+
try:
|
| 451 |
+
game = get_game()
|
| 452 |
+
j_env = game.env.env
|
| 453 |
+
# room_id = j_env.get_player_location()
|
| 454 |
+
room_id = 166
|
| 455 |
+
|
| 456 |
+
# On définit les directions cardinales
|
| 457 |
+
directions = ['north', 'south', 'east', 'west', 'up', 'down', 'ne', 'nw', 'se', 'sw', 'in', 'out']
|
| 458 |
+
valid_exits = []
|
| 459 |
+
|
| 460 |
+
# On utilise une méthode de lecture de propriété si elle existe,
|
| 461 |
+
# sinon on se rabat sur une analyse de l'observation textuelle 'propre'
|
| 462 |
+
for d in directions:
|
| 463 |
+
try:
|
| 464 |
+
# Tentative de lecture directe de la destination pour la direction d
|
| 465 |
+
dest_id = j_env.get_next_location(room_id, d)
|
| 466 |
+
if dest_id > 0:
|
| 467 |
+
valid_exits.append(d)
|
| 468 |
+
except:
|
| 469 |
+
continue
|
| 470 |
+
|
| 471 |
+
# FALLBACK : Si le moteur Jericho est vraiment trop bridé pour les sorties
|
| 472 |
+
if not valid_exits:
|
| 473 |
+
obs = game.state.observation.lower()
|
| 474 |
+
# On cherche des patterns classiques comme "Exit: North" ou "to the south"
|
| 475 |
+
for d in directions:
|
| 476 |
+
if f" {d}" in obs and ("exit" in obs or "lead" in obs or "way" in obs):
|
| 477 |
+
valid_exits.append(d)
|
| 478 |
+
|
| 479 |
+
return f"STRICT VALID EXITS: {', '.join(valid_exits) if valid_exits else 'None (Try exploring manually)'}"
|
| 480 |
+
except Exception as e:
|
| 481 |
+
return f"Error: {e}"
|
| 482 |
+
|
| 483 |
+
@mcp.tool()
|
| 484 |
+
def cheat_get_status() -> str:
|
| 485 |
+
"""
|
| 486 |
+
[GOD MODE] Returns the internal state: Score, Moves, and Inventory.
|
| 487 |
+
"""
|
| 488 |
+
try:
|
| 489 |
+
score = env.get_score()
|
| 490 |
+
# Note: inventory est une liste d'objets Jericho
|
| 491 |
+
inv_objs = env.get_inventory()
|
| 492 |
+
inv_names = [obj.name for obj in inv_objs]
|
| 493 |
+
|
| 494 |
+
return f"""
|
| 495 |
+
SCORE: {score}
|
| 496 |
+
INVENTORY (INTERNAL): {inv_names}
|
| 497 |
+
"""
|
| 498 |
+
except Exception as e:
|
| 499 |
+
return str(e)
|
| 500 |
|
| 501 |
|
| 502 |
+
@mcp.tool()
|
| 503 |
+
def get_location_info() -> dict:
|
| 504 |
+
"""
|
| 505 |
+
Scan la pièce actuelle et le contenu immédiat des objets (Niveau +1).
|
| 506 |
+
Permet au LLM de déduire les types via les noms et la structure.
|
| 507 |
+
"""
|
| 508 |
+
try:
|
| 509 |
+
game = get_game()
|
| 510 |
+
env = game.env.env
|
| 511 |
+
|
| 512 |
+
def clean_name(name):
|
| 513 |
+
return str(name).replace('\x00', '').strip() if name else "Inconnu"
|
| 514 |
+
|
| 515 |
+
# 1. Localisation racine
|
| 516 |
+
loc = env.get_player_location()
|
| 517 |
+
room_id = loc.num if hasattr(loc, 'num') else int(loc)
|
| 518 |
+
room_obj = env.get_object(room_id)
|
| 519 |
+
|
| 520 |
+
world_objs = env.get_world_objects()
|
| 521 |
+
|
| 522 |
+
# 2. Collecte des objets de la pièce
|
| 523 |
+
detected_elements = []
|
| 524 |
+
for obj in world_objs:
|
| 525 |
+
# On ne prend que les objets dont le parent est la pièce actuelle
|
| 526 |
+
if obj.parent == room_id:
|
| 527 |
+
name = clean_name(obj.name)
|
| 528 |
+
if "player" in name.lower(): continue
|
| 529 |
+
|
| 530 |
+
# Scan de niveau +1 : on regarde juste si cet objet contient des trucs
|
| 531 |
+
sub_contents = []
|
| 532 |
+
for sub_obj in world_objs:
|
| 533 |
+
if sub_obj.parent == obj.num:
|
| 534 |
+
sub_contents.append({
|
| 535 |
+
"id": sub_obj.num,
|
| 536 |
+
"name": clean_name(sub_obj.name)
|
| 537 |
+
})
|
| 538 |
+
|
| 539 |
+
# On construit une fiche simple
|
| 540 |
+
element = {
|
| 541 |
+
"id": obj.num,
|
| 542 |
+
"name": name,
|
| 543 |
+
"contains_count": len(sub_contents),
|
| 544 |
+
"contents": sub_contents # Liste des noms/id immédiats
|
| 545 |
+
}
|
| 546 |
+
detected_elements.append(element)
|
| 547 |
+
|
| 548 |
+
return {
|
| 549 |
+
"status": "success",
|
| 550 |
+
"location": {
|
| 551 |
+
"id": room_id,
|
| 552 |
+
"name": clean_name(room_obj.name)
|
| 553 |
+
},
|
| 554 |
+
"detected_objects": detected_elements,
|
| 555 |
+
"inventory": [
|
| 556 |
+
{"id": i.num, "name": clean_name(i.name)}
|
| 557 |
+
for i in env.get_inventory()
|
| 558 |
+
],
|
| 559 |
+
"world_hash": env.get_world_state_hash()
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
except Exception as e:
|
| 563 |
+
return {"status": "error", "message": str(e)}
|
| 564 |
# @mcp.tool()
|
| 565 |
+
# def get_location_info() -> dict:
|
| 566 |
# """
|
| 567 |
+
# Retourne la vérité absolue de la RAM : Localisation, Objets présents,
|
| 568 |
+
# Inventaire et Hash d'état.
|
|
|
|
|
|
|
| 569 |
# """
|
| 570 |
+
# try:
|
| 571 |
+
# game = get_game()
|
| 572 |
+
# env = game.env.env
|
| 573 |
+
|
| 574 |
+
# # 1. Récupération de la localisation précise
|
| 575 |
+
# loc = env.get_player_location()
|
| 576 |
+
# room_id = loc.num if hasattr(loc, 'num') else int(loc)
|
| 577 |
+
# room_obj = env.get_object(room_id)
|
| 578 |
+
# room_name = str(room_obj.name).replace('\x00', '').strip() if room_obj else "Unknown Room"
|
| 579 |
+
|
| 580 |
+
# # 2. Scan des objets présents dans la pièce (Vérité RAM)
|
| 581 |
+
# world_objs = env.get_world_objects()
|
| 582 |
+
# detected_objects = []
|
| 583 |
+
# for obj in world_objs:
|
| 584 |
+
# # On vérifie si l'objet est physiquement dans la pièce
|
| 585 |
+
# if obj.parent == room_id:
|
| 586 |
+
# name = str(obj.name).replace('\x00', '').strip()
|
| 587 |
+
# # On ignore le joueur et les objets sans nom
|
| 588 |
+
# if name and "player" not in name.lower():
|
| 589 |
+
# # On stocke le nom et l'ID pour le "Radar" de l'agent
|
| 590 |
+
# detected_objects.append({"name": name, "id": obj.num})
|
| 591 |
+
|
| 592 |
+
# # 3. Inventaire technique (Objets portés)
|
| 593 |
+
# inventory = []
|
| 594 |
+
# for obj in env.get_inventory():
|
| 595 |
+
# inv_name = str(obj.name).replace('\x00', '').strip()
|
| 596 |
+
# inventory.append({"name": inv_name, "id": obj.num})
|
| 597 |
+
|
| 598 |
+
# # 4. Hash d'état pour détecter tout changement atomique
|
| 599 |
+
# world_hash = env.get_world_state_hash()
|
| 600 |
+
|
| 601 |
+
# return {
|
| 602 |
+
# "status": "success",
|
| 603 |
+
# "location": {
|
| 604 |
+
# "id": room_id,
|
| 605 |
+
# "name": room_name
|
| 606 |
+
# },
|
| 607 |
+
# "detected_objects": detected_objects,
|
| 608 |
+
# "inventory": inventory,
|
| 609 |
+
# "world_hash": world_hash
|
| 610 |
+
# }
|
| 611 |
+
|
| 612 |
+
# except Exception as e:
|
| 613 |
+
# return {
|
| 614 |
+
# "status": "error",
|
| 615 |
+
# "message": str(e),
|
| 616 |
+
# "location": {"id": -1, "name": "Error"}
|
| 617 |
+
# }
|
| 618 |
|
| 619 |
+
@mcp.tool()
|
| 620 |
+
def get_current_room_name() -> str:
|
| 621 |
+
"""Récupère le nom officiel de la pièce dans la mémoire du jeu."""
|
| 622 |
+
try:
|
| 623 |
+
game = get_game()
|
| 624 |
+
env = game.env.env # Accès Jericho
|
| 625 |
+
|
| 626 |
+
# 1. On récupère l'ID de la pièce actuelle
|
| 627 |
+
room_id = env.get_player_location()
|
| 628 |
+
|
| 629 |
+
# 2. On récupère l'objet correspondant
|
| 630 |
+
room_obj = env.get_object(room_id)
|
| 631 |
+
|
| 632 |
+
# 3. On renvoie son nom (en forçant le string pour éviter les bytes)
|
| 633 |
+
return str(room_obj.name).strip()
|
| 634 |
+
except Exception as e:
|
| 635 |
+
return f"Error: {e}"
|
| 636 |
+
|
| 637 |
+
@mcp.tool()
|
| 638 |
+
def test_dictionary_access() -> str:
|
| 639 |
+
"""[TEST] Récupère un échantillon du dictionnaire interne du jeu."""
|
| 640 |
+
try:
|
| 641 |
+
game = get_game()
|
| 642 |
+
# Récupère tous les mots connus du parseur
|
| 643 |
+
words = game.env.env.get_dictionary()
|
| 644 |
+
# On en prend 20 pour ne pas surcharger le log
|
| 645 |
+
sample = [w.word for w in words[:20]]
|
| 646 |
+
return f"DICTIONARY SAMPLE ({len(words)} words total): " + ", ".join(sample)
|
| 647 |
+
except Exception as e:
|
| 648 |
+
return f"Dictionary Test Failed: {e}"
|
| 649 |
|
| 650 |
+
@mcp.tool()
|
| 651 |
+
def test_grammar_access() -> str:
|
| 652 |
+
"""[TEST] Récupère les modèles d'actions (Grammaire)."""
|
| 653 |
+
try:
|
| 654 |
+
game = get_game()
|
| 655 |
+
# Les bindings contiennent la grammaire des verbes
|
| 656 |
+
bindings = game.env.env.bindings
|
| 657 |
+
grammar = bindings.get('grammar', 'No grammar found')
|
| 658 |
+
# On coupe si c'est trop long pour l'affichage
|
| 659 |
+
return f"GRAMMAR SNIPPET: {grammar[:300]}..."
|
| 660 |
+
except Exception as e:
|
| 661 |
+
return f"Grammar Test Failed: {e}"
|
| 662 |
+
|
| 663 |
+
@mcp.tool()
|
| 664 |
+
def get_object_tree_simple() -> str:
|
| 665 |
+
"""[TEST] Liste brute des 5 premiers objets pour voir leur structure."""
|
| 666 |
+
try:
|
| 667 |
+
game = get_game()
|
| 668 |
+
objs = game.env.env.get_world_objects()
|
| 669 |
+
res = []
|
| 670 |
+
for o in objs[:5]:
|
| 671 |
+
res.append(f"Obj{o.num}: {o.name} (Parent:{o.parent})")
|
| 672 |
+
return "OBJECT TREE SAMPLE: " + " | ".join(res)
|
| 673 |
+
except Exception as e:
|
| 674 |
+
return f"Object Tree Failed: {e}"
|
| 675 |
# =============================================================================
|
| 676 |
+
# Main
|
| 677 |
# =============================================================================
|
| 678 |
|
| 679 |
if __name__ == "__main__":
|
|
|
|
| 680 |
mcp.run()
|
memory.py
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
from dataclasses import dataclass, asdict
|
| 4 |
+
from typing import List, Dict, Optional
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@dataclass
|
| 8 |
+
class MemoryEntry:
|
| 9 |
+
category: str
|
| 10 |
+
type: str
|
| 11 |
+
title: str
|
| 12 |
+
text: str
|
| 13 |
+
turn_created: int
|
| 14 |
+
data_list: Optional[List[str]] = None
|
| 15 |
+
|
| 16 |
+
MEMORY_SYNTHESIS_PROMPT = MEMORY_SYNTHESIS_PROMPT = """
|
| 17 |
+
[ROLE]
|
| 18 |
+
You are the Memory Architect. Your goal is to extract ONLY NEW and RELEVANT facts.
|
| 19 |
+
|
| 20 |
+
[INPUT]
|
| 21 |
+
- Location: {location}
|
| 22 |
+
- Action: {action}
|
| 23 |
+
- Result: {result}
|
| 24 |
+
- Known Memories: {existing}
|
| 25 |
+
|
| 26 |
+
[TASK]
|
| 27 |
+
Compare the 'Result' with 'Known Memories'.
|
| 28 |
+
- If the result contains NO NEW information or state changes, return {{"should_remember": false}}.
|
| 29 |
+
- If something changed or a new fact appeared, create a memory.
|
| 30 |
+
- Use the "supersedes" list to name the titles of old memories that are now obsolete.
|
| 31 |
+
|
| 32 |
+
[CRITICAL: NO DUPLICATES]
|
| 33 |
+
Do NOT extract information already present in 'Known Memories'.
|
| 34 |
+
Focus on state changes (e.g., door opened, item taken).
|
| 35 |
+
|
| 36 |
+
[OUTPUT FORMAT]
|
| 37 |
+
Return ONLY a JSON object. No markdown blocks, no explanations.
|
| 38 |
+
{{
|
| 39 |
+
"should_remember": bool,
|
| 40 |
+
"memories": [
|
| 41 |
+
{{
|
| 42 |
+
"title": "Unique Title",
|
| 43 |
+
"text": "The new fact",
|
| 44 |
+
"category": "DANGER"|"MECHANIC"|"STATE"|"INFO",
|
| 45 |
+
"type": "PERMANENT"|"EPHEMERAL"|"CORE"
|
| 46 |
+
}}
|
| 47 |
+
],
|
| 48 |
+
"supersedes": ["Old Title"]
|
| 49 |
+
}}
|
| 50 |
+
"""
|
| 51 |
+
|
| 52 |
+
class HierarchicalMemoryManager:
|
| 53 |
+
def __init__(self, call_llm_func, filepath="Memories.md"):
|
| 54 |
+
self.call_llm = call_llm_func
|
| 55 |
+
self.filepath = filepath
|
| 56 |
+
self.memories: Dict[str, List[MemoryEntry]] = {} # Key: Location Name
|
| 57 |
+
self.load_from_md() # Chargement au démarrage
|
| 58 |
+
|
| 59 |
+
def load_from_md(self):
|
| 60 |
+
"""Charge les mémoires depuis le fichier Markdown."""
|
| 61 |
+
if not os.path.exists(self.filepath):
|
| 62 |
+
return
|
| 63 |
+
|
| 64 |
+
current_loc = None
|
| 65 |
+
pattern = re.compile(r'- \[(.*?)\] \[(.*?)\] \*\*(.*?)\*\*: (.*)')
|
| 66 |
+
|
| 67 |
+
with open(self.filepath, 'r', encoding='utf-8') as f:
|
| 68 |
+
for line in f:
|
| 69 |
+
line = line.strip()
|
| 70 |
+
if line.startswith("## Location:"):
|
| 71 |
+
current_loc = line.split(":", 1)[1].strip()
|
| 72 |
+
self.memories[current_loc] = []
|
| 73 |
+
elif line.startswith("- [") and current_loc:
|
| 74 |
+
match = pattern.match(line)
|
| 75 |
+
if match:
|
| 76 |
+
mem_type, cat, title, text = match.groups()
|
| 77 |
+
self.memories[current_loc].append(MemoryEntry(
|
| 78 |
+
category=cat, type=mem_type, title=title, text=text, turn_created=0
|
| 79 |
+
))
|
| 80 |
+
|
| 81 |
+
def _is_redundant(self, location: str, new_text: str) -> bool:
|
| 82 |
+
"""Vérifie si le texte existe déjà (partiellement ou totalement) dans ce lieu."""
|
| 83 |
+
if location not in self.memories:
|
| 84 |
+
return False
|
| 85 |
+
|
| 86 |
+
new_text_clean = new_text.lower().strip()
|
| 87 |
+
for m in self.memories[location]:
|
| 88 |
+
existing_text = m.text.lower().strip()
|
| 89 |
+
# 1. Correspondance exacte
|
| 90 |
+
if new_text_clean == existing_text:
|
| 91 |
+
return True
|
| 92 |
+
# 2. Inclusion (si la nouvelle info est déjà contenue dans une ancienne)
|
| 93 |
+
if new_text_clean in existing_text:
|
| 94 |
+
return True
|
| 95 |
+
return False
|
| 96 |
+
|
| 97 |
+
def _upsert_memory(self, location: str, new_mem: MemoryEntry):
|
| 98 |
+
"""Remplace par titre OU ignore si le contenu est redondant."""
|
| 99 |
+
if location not in self.memories:
|
| 100 |
+
self.memories[location] = []
|
| 101 |
+
|
| 102 |
+
# Si le contenu est déjà là, on ne fait rien (évite les doublons de sens)
|
| 103 |
+
if self._is_redundant(location, new_mem.text):
|
| 104 |
+
return
|
| 105 |
+
|
| 106 |
+
# Remplacement par titre (Update d'état)
|
| 107 |
+
self.memories[location] = [
|
| 108 |
+
m for m in self.memories[location]
|
| 109 |
+
if m.title != new_mem.title
|
| 110 |
+
]
|
| 111 |
+
self.memories[location].append(new_mem)
|
| 112 |
+
|
| 113 |
+
def update_inventory(self, items: List[str], step: int):
|
| 114 |
+
"""Met à jour la section spéciale Inventaire dans le Markdown."""
|
| 115 |
+
loc = "GLOBAL_INVENTORY"
|
| 116 |
+
text = ", ".join(items) if items else "Empty"
|
| 117 |
+
self._upsert_memory(loc, MemoryEntry(
|
| 118 |
+
category="PLAYER", type="EPHEMERAL",
|
| 119 |
+
title="Current Inventory", text=text, turn_created=step
|
| 120 |
+
))
|
| 121 |
+
self.save_to_md()
|
| 122 |
+
|
| 123 |
+
def update_local_state(self, location: str, obs: dict, step: int):
|
| 124 |
+
"""Met à jour la mémoire locale à partir d'une StructuredObservation."""
|
| 125 |
+
if location not in self.memories:
|
| 126 |
+
self.memories[location] = []
|
| 127 |
+
|
| 128 |
+
if obs.get("takeable_objects"):
|
| 129 |
+
objs_text = ", ".join(obs["takeable_objects"])
|
| 130 |
+
self._upsert_memory(location, MemoryEntry(
|
| 131 |
+
category="ITEMS", type="EPHEMERAL",
|
| 132 |
+
title="Visible Objects", text=objs_text, turn_created=step
|
| 133 |
+
))
|
| 134 |
+
|
| 135 |
+
if obs.get("visible_exits"):
|
| 136 |
+
exits_text = ", ".join(obs["visible_exits"])
|
| 137 |
+
self._upsert_memory(location, MemoryEntry(
|
| 138 |
+
category="MAP", type="CORE",
|
| 139 |
+
title="Available Exits", text=exits_text, turn_created=step
|
| 140 |
+
))
|
| 141 |
+
|
| 142 |
+
# 3. Sauvegarde
|
| 143 |
+
self.save_to_md()
|
| 144 |
+
|
| 145 |
+
def save_to_md(self):
|
| 146 |
+
"""Sauvegarde les mémoires dans le fichier Markdown."""
|
| 147 |
+
with open(self.filepath, 'w', encoding='utf-8') as f:
|
| 148 |
+
f.write("# ZorkGPT Agent Memories\n\n")
|
| 149 |
+
|
| 150 |
+
for loc, entries in sorted(self.memories.items()):
|
| 151 |
+
if not entries: continue
|
| 152 |
+
f.write(f"## Location: {loc}\n")
|
| 153 |
+
|
| 154 |
+
# On trie pour afficher CORE -> PERMANENT -> EPHEMERAL
|
| 155 |
+
entries.sort(key=lambda x: {"CORE": 0, "PERMANENT": 1, "EPHEMERAL": 2}.get(x.type, 3))
|
| 156 |
+
|
| 157 |
+
for m in entries:
|
| 158 |
+
f.write(f"- [{m.type}] [{m.category}] **{m.title}**: {m.text}\n")
|
| 159 |
+
f.write("\n")
|
| 160 |
+
|
| 161 |
+
def get_context(self, location: str) -> str:
|
| 162 |
+
"""Récupère le contexte formaté pour le LLM."""
|
| 163 |
+
context_lines = []
|
| 164 |
+
|
| 165 |
+
if "GLOBAL_INVENTORY" in self.memories:
|
| 166 |
+
inv = self.memories["GLOBAL_INVENTORY"][0] # On sait qu'il n'y a qu'une entrée
|
| 167 |
+
context_lines.append(f"🎒 CURRENT INVENTORY: {inv.text}")
|
| 168 |
+
|
| 169 |
+
if location in self.memories and self.memories[location]:
|
| 170 |
+
context_lines.append(f"🧠 KNOWLEDGE OF {location.upper()}:")
|
| 171 |
+
# On trie pour avoir CORE (le décor) puis le reste
|
| 172 |
+
sorted_entries = sorted(self.memories[location], key=lambda x: {"CORE": 0, "PERMANENT": 1, "EPHEMERAL": 2}.get(x.type, 3))
|
| 173 |
+
for m in sorted_entries:
|
| 174 |
+
context_lines.append(f" [{m.type}] {m.title}: {m.text}")
|
| 175 |
+
else:
|
| 176 |
+
context_lines.append(f"📍 You are in {location}. You have no previous memories here.")
|
| 177 |
+
|
| 178 |
+
return "\n".join(context_lines)
|
| 179 |
+
|
| 180 |
+
def synthesize(self, location: str, action: str, result: str, step: int):
|
| 181 |
+
"""Synthétise l'action via LLM avec une extraction JSON ultra-robuste."""
|
| 182 |
+
|
| 183 |
+
# 1. Filtre rapide pour économiser des appels LLM
|
| 184 |
+
if len(result) < 40 and any(k in result.lower() for k in ["nothing", "taken", "dropped", "closed"]):
|
| 185 |
+
return
|
| 186 |
+
|
| 187 |
+
existing_txt = self.get_context(location)
|
| 188 |
+
prompt = MEMORY_SYNTHESIS_PROMPT.format(
|
| 189 |
+
location=location, action=action, result=result, existing=existing_txt
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
try:
|
| 193 |
+
response = self.call_llm(prompt, "You are a JSON Memory System.", seed=step, max_tokens=1000)
|
| 194 |
+
|
| 195 |
+
import re
|
| 196 |
+
import json
|
| 197 |
+
|
| 198 |
+
# --- EXTRACTION ROBUSTE ---
|
| 199 |
+
# On cherche le premier '{' et le dernier '}' avec des parenthèses () pour créer le groupe 1
|
| 200 |
+
json_match = re.search(r'(\{.*\})', response, re.DOTALL)
|
| 201 |
+
|
| 202 |
+
if json_match:
|
| 203 |
+
json_str = json_match.group(1) # Maintenant le groupe 1 existe !
|
| 204 |
+
else:
|
| 205 |
+
# Si la regex échoue, on tente un nettoyage manuel des balises Markdown
|
| 206 |
+
json_str = response.strip()
|
| 207 |
+
if "```json" in json_str:
|
| 208 |
+
json_str = json_str.split("```json")[1].split("```")[0]
|
| 209 |
+
elif "```" in json_str:
|
| 210 |
+
json_str = json_str.split("```")[1].split("```")[0]
|
| 211 |
+
|
| 212 |
+
# --- NETTOYAGE DES GUILLEMETS (Auto-fix fréquent pour LLM) ---
|
| 213 |
+
# Remplace les guillemets "smart" ou simples si le LLM s'est trompé
|
| 214 |
+
json_str = json_str.replace('’', "'").replace('‘', "'")
|
| 215 |
+
|
| 216 |
+
data = json.loads(json_str)
|
| 217 |
+
|
| 218 |
+
# --- LOGIQUE DE MISE À JOUR ---
|
| 219 |
+
if data.get("should_remember"):
|
| 220 |
+
if location not in self.memories:
|
| 221 |
+
self.memories[location] = []
|
| 222 |
+
|
| 223 |
+
# Gestion des Supressions (Supersedes)
|
| 224 |
+
to_delete = data.get("supersedes", [])
|
| 225 |
+
if to_delete:
|
| 226 |
+
self.memories[location] = [
|
| 227 |
+
m for m in self.memories[location]
|
| 228 |
+
if m.title not in to_delete
|
| 229 |
+
]
|
| 230 |
+
|
| 231 |
+
# Ajout des nouvelles mémoires
|
| 232 |
+
for item in data.get("memories", []):
|
| 233 |
+
if "title" not in item or "text" not in item: continue
|
| 234 |
+
|
| 235 |
+
new_mem = MemoryEntry(
|
| 236 |
+
category=item.get("category", "INFO"),
|
| 237 |
+
type=item.get("type", "EPHEMERAL"),
|
| 238 |
+
title=item["title"],
|
| 239 |
+
text=item["text"],
|
| 240 |
+
turn_created=step
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
# On évite d'ajouter deux fois le même titre dans le même lieu
|
| 244 |
+
if not any(m.title == new_mem.title for m in self.memories[location]):
|
| 245 |
+
self.memories[location].append(new_mem)
|
| 246 |
+
print(f"💾 [MEMORY SAVED] [{new_mem.type}] {new_mem.title}")
|
| 247 |
+
|
| 248 |
+
self.save_to_md()
|
| 249 |
+
|
| 250 |
+
except json.JSONDecodeError as je:
|
| 251 |
+
print(f"response MEMORY {response}")
|
| 252 |
+
print(f"⚠️ JSON Format Error: {je}. Check LLM output.")
|
| 253 |
+
except Exception as e:
|
| 254 |
+
# C'est ici que tu avais l'erreur "no such group"
|
| 255 |
+
print(f"response MEMORY {response}")
|
| 256 |
+
print(f"⚠️ Memory Synthesis Warning: {e}")
|
| 257 |
+
print(f"response MEMORY {response}")
|
prompts.py
ADDED
|
@@ -0,0 +1,537 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# =============================================================================
|
| 2 |
+
# MAIN AGENT PROMPTS
|
| 3 |
+
# =============================================================================
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
SYSTEM_PROMPT = """You are an expert adventurer agent playing a text game via MCP. YOUR GOAL : Explore, maximize score, solve puzzles and collect items.
|
| 7 |
+
|
| 8 |
+
AVAILABLE TOOLS (use these via MCP):
|
| 9 |
+
1. play_action - Execute game commands (north,west,east,south,take torch,open mailbox)
|
| 10 |
+
2. get_valide_actions_cheat - Exact list of verbs/commands allowed right now, use it if you are stuck or actions didn't sucess in history.
|
| 11 |
+
3. detect_objects_cheat : List of all interactable objects hidden in text, use if you enter a new room or vague description.
|
| 12 |
+
4. get_map - See explored locations and connections, use if you are lost or want to check visited places.
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
### CRITICAL SURVIVAL RULES
|
| 16 |
+
1. COMBAT: If enemy present -> Use 'play_action' with "attack [enemy]".
|
| 17 |
+
2. DARKNESS : If dark -> Use 'play_action' with 'equip torch' or 'ligth torch'.
|
| 18 |
+
3. ERRORS: If your command fails TWICE, STOP. Use 'get_valid_actions_cheat'.
|
| 19 |
+
4. USE ITEMS FOR THEY NEED : items are not dangerous if not said explicitely.
|
| 20 |
+
5. EXPLORE ALL DIRECTIONS WHEN STUCK
|
| 21 |
+
6. 'play_action' ONLY accepts the 'action' key. Example: {"action": "look"}
|
| 22 |
+
7. DO NOT add 'direction', 'item', or any other key to 'play_action'.
|
| 23 |
+
8. Directions must be part of the action string: {"action": "go north"} or {"action": "n"}
|
| 24 |
+
|
| 25 |
+
VALID GAME COMMANDS for play_action:
|
| 26 |
+
- Movement: north, south, east, west, ne, se, sw, nw, up, down, enter, exit, climb
|
| 27 |
+
- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>, equip <thing>
|
| 28 |
+
- Light: turn on lamp, turn off lamp
|
| 29 |
+
- Combat: attack <enemy> with <weapon>
|
| 30 |
+
- Other: inventory, look, read <thing>, wait
|
| 31 |
+
|
| 32 |
+
FORBIDDEN (will NOT work): check, inspect, search, grab, use, help, move
|
| 33 |
+
|
| 34 |
+
### RESPONSE FORMAT
|
| 35 |
+
|
| 36 |
+
RESPOND IN THIS EXACT FORMAT (no markdown):
|
| 37 |
+
THOUGHT: <brief reasoning about what to do next>
|
| 38 |
+
TOOL: <tool_name>
|
| 39 |
+
ARGS: <JSON arguments>
|
| 40 |
+
|
| 41 |
+
Examples:
|
| 42 |
+
THOUGHT: I need to see what's around me.
|
| 43 |
+
TOOL: play_action
|
| 44 |
+
ARGS: {"action": "look"}
|
| 45 |
+
|
| 46 |
+
THOUGHT : Nothing happened last round and all my actions have zero impact, I should wait
|
| 47 |
+
TOOL : play_action
|
| 48 |
+
ARGS : {"action": "wait"}
|
| 49 |
+
|
| 50 |
+
THOUGHT: Let me check my current state and score.
|
| 51 |
+
TOOL: memory
|
| 52 |
+
ARGS: {}
|
| 53 |
+
|
| 54 |
+
THOUGHT: It's dark I need to equip torch
|
| 55 |
+
TOOL: "play_action
|
| 56 |
+
ARGS: {"action":"get torch"}
|
| 57 |
+
|
| 58 |
+
THOUGHT: The mailbox might contain something useful.
|
| 59 |
+
TOOL: play_action
|
| 60 |
+
ARGS: {"action": "open mailbox"}
|
| 61 |
+
|
| 62 |
+
STRATEGY:
|
| 63 |
+
1. Start by looking around and checking memory
|
| 64 |
+
2. Explore systematically - try all directions : w,s,n,e,ne,sw,nw,se
|
| 65 |
+
3. Pick up useful items (lamp, sword, etc.) all the time you can
|
| 66 |
+
4. Open containers (mailbox, window, etc.)
|
| 67 |
+
5. Use get_map to avoid getting lost
|
| 68 |
+
6. equip torch or turn on lamp before dark areas!
|
| 69 |
+
7. If you can't move to a new location try to wait
|
| 70 |
+
|
| 71 |
+
THINK OF RESULT BEFORE TAKING ACTION
|
| 72 |
+
|
| 73 |
+
DO NOT repeat the same action multiple times in a row.
|
| 74 |
+
"""
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# =============================================================================
|
| 78 |
+
# STRATEGY PROMPTS
|
| 79 |
+
# =============================================================================
|
| 80 |
+
|
| 81 |
+
PLANNER_SYSTEM_PROMPT = """You are the Strategic Planner for a text adventure game.
|
| 82 |
+
Your job is NOT to play the game, but to guide the player with a high-level plan.
|
| 83 |
+
|
| 84 |
+
GOAL: Analyze the current situation and create a concise step-by-step plan for the next few moves.
|
| 85 |
+
|
| 86 |
+
INPUT DATA:
|
| 87 |
+
- Map/Location: Where the player is.
|
| 88 |
+
- Inventory: What they have.
|
| 89 |
+
- History: What they just did.
|
| 90 |
+
|
| 91 |
+
OUTPUT FORMAT (JSON only):
|
| 92 |
+
{
|
| 93 |
+
"reasoning": "Brief analysis of the situation",
|
| 94 |
+
"current_objective": "The main immediate goal (e.g., 'Enter the white house')",
|
| 95 |
+
"suggested_steps": [
|
| 96 |
+
"step 1",
|
| 97 |
+
"step 2",
|
| 98 |
+
"step 3"
|
| 99 |
+
]
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
Make the plan logical. If the player is stuck, suggest a different direction.
|
| 104 |
+
"""
|
| 105 |
+
# =============================================================================
|
| 106 |
+
# EXTRACTOR PROMPTS
|
| 107 |
+
# =============================================================================
|
| 108 |
+
|
| 109 |
+
EXTRACTOR_SYSTEM_PROMPT = """
|
| 110 |
+
**Task:** Extract semantic mapping and environmental clues from Text Adventure logs.
|
| 111 |
+
**Output:** Return ONLY a valid JSON object. No prose.
|
| 112 |
+
|
| 113 |
+
**1. name_translation :**
|
| 114 |
+
- Purpose: Map technical RAM names (e.g., 'lpig', 'brokstair') to natural nouns found ONLY in the RAW GAME TEXT.
|
| 115 |
+
- Rule 1: Point-to-Word. If the word "pig" is in the text and 'lpig' is in RAM, map {"lpig": "pig"}.
|
| 116 |
+
- Rule 2: Zero Inference. If the text says "the animal" but RAM says 'lpig', DO NOT map it.
|
| 117 |
+
- Rule 3: Nouns Only. No "shiny torch", just "torch".
|
| 118 |
+
|
| 119 |
+
**STRICT NAME MAPPING RULES (ZERO-TOLERANCE):**
|
| 120 |
+
1. **TEXTUAL PROOF REQUIRED**: A RAM object MUST remain by its technical name (e.g., 'torchobj') if the EXACT noun is not visible in the "RAW GAME TEXT".
|
| 121 |
+
2. **THE "EXISTS" TEST**: For every item in `name_translation`, you must be able to point to the exact word in the text.
|
| 122 |
+
5. **ONLY NOUNS**: No "old", "small", "shiny". Just the core noun found in the text.
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
**2. takeable_objects & interactable_features:**
|
| 126 |
+
- Use the RAM list to cross-reference.
|
| 127 |
+
- 'takeable_objects': Small, portable technical names (e.g., 'torchobj', 'key7').
|
| 128 |
+
- 'interactable_features': Technical names of fixed scenery (e.g., 'wall_west', 'bigtree') mentioned in the text.
|
| 129 |
+
|
| 130 |
+
**3. visible_exits:**
|
| 131 |
+
- Scan text for cardinal directions (north, south, east, west, up, down, northeast, northwest, southeast, southwest, in, out).
|
| 132 |
+
- Ignore descriptions (e.g., "a path to the north" -> "north").
|
| 133 |
+
|
| 134 |
+
**4. puzzle_clues:**
|
| 135 |
+
- Identify technical objects starting with '(missing...)'.
|
| 136 |
+
- Extract hints, riddles, or sensory warnings (sounds, tracks, smells) from the text.
|
| 137 |
+
|
| 138 |
+
**STRICT CONSTRAINTS:**
|
| 139 |
+
- If a RAM object is NOT in the text, it MUST be ignored in 'name_translation'.
|
| 140 |
+
- No hallucination: Do not invent objects.
|
| 141 |
+
- Output MUST be strictly valid JSON.
|
| 142 |
+
- If the scene is complex, prioritize 'name_translation' and 'visible_exits'.
|
| 143 |
+
|
| 144 |
+
**JSON Schema:**
|
| 145 |
+
{
|
| 146 |
+
"name_translation": {"tech_name": "natural_name"},
|
| 147 |
+
"takeable_objects": ["tech_name"],
|
| 148 |
+
"interactable_features": ["tech_name"],
|
| 149 |
+
"visible_exits": ["direction"],
|
| 150 |
+
"puzzle_clues": ["text hint"],
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
"""
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
# =============================================================================
|
| 159 |
+
# CRITIC PROMPTS
|
| 160 |
+
# =============================================================================
|
| 161 |
+
|
| 162 |
+
CRITIC_SYSTEM_PROMPT = """
|
| 163 |
+
You are an Interactive Fiction Game Critic evaluating AI agent actions in Text game.
|
| 164 |
+
|
| 165 |
+
LET it do X TORCH WHEN HE WANT
|
| 166 |
+
|
| 167 |
+
Core Game Mechanics :
|
| 168 |
+
"Taken" = SUCCESS (item successfully picked up, agent can continue normally)
|
| 169 |
+
Parser failures: "I don't understand", "You can't do that", "can't see any such thing", "I don't know such a thing"
|
| 170 |
+
Movement blocks: "There is a wall there", "too narrow", "can't go that way", "Not allowed there"
|
| 171 |
+
|
| 172 |
+
Feedback Taxonomy (align with agent's guidance):
|
| 173 |
+
|
| 174 |
+
Hard Rejection: "I don't understand", "You can't", "There is no X" → Stop exact repetition
|
| 175 |
+
Soft Rejection: "Too dark", "Can't reach", "Too heavy" → Solve prerequisite needed
|
| 176 |
+
Puzzle Feedback: Dynamic/unusual responses → Encourage systematic experimentation
|
| 177 |
+
Combat Feedback: Hits, misses, wounds, dodges → Encourage repeated attacks until resolution
|
| 178 |
+
Success with Complications: Action succeeds but triggers something → Chain reaction
|
| 179 |
+
|
| 180 |
+
Context Provided: You will receive:
|
| 181 |
+
|
| 182 |
+
Current inventory: Items the agent is currently carrying (critical for evaluating item-based actions)
|
| 183 |
+
Available exits: GROUND TRUTH list of valid exits from game engine (use for movement validation - these are 100% accurate)
|
| 184 |
+
Location-specific failures: Actions marked "IMPORTANT" have previously FAILED at this exact location
|
| 185 |
+
Global action counts: How many times an action has been attempted across all locations
|
| 186 |
+
Recent action history: Last 3 action/response pairs showing immediate context and patterns
|
| 187 |
+
|
| 188 |
+
CRITICAL - No Information Leakage: Your justifications will be shown to the agent when actions are rejected.
|
| 189 |
+
You have god-like knowledge (ground-truth exits, inventory visibility, object tree) that the agent should NOT learn from your feedback. The agent must discover the world organically through gameplay.
|
| 190 |
+
|
| 191 |
+
Evaluation Criteria :
|
| 192 |
+
|
| 193 |
+
Context Relevance: Does action match current state? Objects mentioned in descriptions ARE present and interactable.
|
| 194 |
+
Progress Potential: Will it advance gameplay, solve puzzles, or increase score?
|
| 195 |
+
Information Gathering: For new situations, does it explore or examine appropriately?
|
| 196 |
+
Parser Compatibility: Is command syntactically valid (VERB-NOUN format)?
|
| 197 |
+
Creative use of inventory: Reward using items to modify environment or solve prerequisites.
|
| 198 |
+
|
| 199 |
+
Actions evaluations :
|
| 200 |
+
|
| 201 |
+
High Score (+0.6 to +0.8):
|
| 202 |
+
|
| 203 |
+
Verb relates to emphasized room property (ECHO for "echoing room", COOL for "hot room")
|
| 204 |
+
Standard actions already failed with puzzle feedback (showing graduated approach)
|
| 205 |
+
Thematic item-container matching (sword in armory, treasure in decorative case)
|
| 206 |
+
|
| 207 |
+
Moderate Score (+0.2 to +0.4):
|
| 208 |
+
Creative verb but standard actions not tried yet (premature complexity)
|
| 209 |
+
Environmental verb somewhat related but indirect
|
| 210 |
+
Reasonable experimentation without clear puzzle indicators
|
| 211 |
+
|
| 212 |
+
Low/Negative Score (-0.3 to -0.8):
|
| 213 |
+
Nonsense verbs (BANANA, PURPLE) unrelated to context
|
| 214 |
+
Environmental verb contradicts context (FREEZE in hot room)
|
| 215 |
+
Jumping to complex solutions without trying basics
|
| 216 |
+
|
| 217 |
+
Provide JSON response with:
|
| 218 |
+
|
| 219 |
+
score: -1.0 to +1.0 scale
|
| 220 |
+
Negative (-1.0 to -0.1): Harmful, repetitive, or nonsensical
|
| 221 |
+
Neutral (0.0): No clear benefit or harm
|
| 222 |
+
Positive (+0.1 to +1.0): Useful exploration, problem-solving, or progress
|
| 223 |
+
justification: Single-line explanation (no newlines)
|
| 224 |
+
confidence: 0.0 to 1.0 (your certainty in THIS EVALUATION, not action success)
|
| 225 |
+
0.9-1.0: Clear evaluation (standard actions with object tree validation)
|
| 226 |
+
0.6-0.8: Moderate certainty (experimental actions with environmental match)
|
| 227 |
+
0.3-0.5: Low certainty (novel situation, insufficient context)
|
| 228 |
+
Note: Experimental actions should generally have lower confidence (0.6-0.7) since outcomes are uncertain
|
| 229 |
+
Example Responses:
|
| 230 |
+
|
| 231 |
+
Standard action: {"score": 0.7, "justification": "Taking visible object is productive and aligns with inventory collection goals.", "confidence": 0.9}
|
| 232 |
+
|
| 233 |
+
Experimental verb (with puzzle context): {"score": 0.7, "justification": "Verb selection matches emphasized room properties after standard actions failed (systematic experimentation).", "confidence": 0.65}
|
| 234 |
+
|
| 235 |
+
Nonsense action: {"score": -0.8, "justification": "Action appears random and unrelated to current context or environmental clues.", "confidence": 0.8}
|
| 236 |
+
|
| 237 |
+
Premature complexity: {"score": 0.2, "justification": "Creative verb but would benefit from trying standard actions first (graduated approach).", "confidence": 0.7}
|
| 238 |
+
|
| 239 |
+
Prerequisite chain: {"score": 0.9, "justification": "Addressing environmental constraint before object interaction demonstrates multi-step reasoning.", "confidence": 0.85}
|
| 240 |
+
|
| 241 |
+
Combat action (repeated attack): {"score": 0.8, "justification": "Continuing attack during active combat shows appropriate persistence (repeated attacks with changing combat feedback).", "confidence": 0.85}
|
| 242 |
+
|
| 243 |
+
Focus solely on evaluating the proposed action's merit given the current state.
|
| 244 |
+
|
| 245 |
+
"""
|
| 246 |
+
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
"""
|
| 250 |
+
You are an Interactive Fiction Game Critic evaluating AI agent actions in Text game.
|
| 251 |
+
|
| 252 |
+
Core Game Mechanics:
|
| 253 |
+
|
| 254 |
+
"Taken" = SUCCESS (item successfully picked up, agent can continue normally)
|
| 255 |
+
Parser failures: "I don't understand", "You can't do that", "can't see any such thing"
|
| 256 |
+
Movement blocks: "There is a wall there", "too narrow", "can't go that way"
|
| 257 |
+
|
| 258 |
+
PUZZLE RECOGNITION:
|
| 259 |
+
|
| 260 |
+
Learn to identify when the agent is in "puzzle mode" vs "exploration mode" to evaluate experimental actions appropriately.
|
| 261 |
+
|
| 262 |
+
Indicators requiring special evaluation in recent action history:
|
| 263 |
+
|
| 264 |
+
Puzzle Situations:
|
| 265 |
+
|
| 266 |
+
Puzzle Feedback: Unusual responses (vibrating, echoing, phasing, transforming) - NOT hard rejections
|
| 267 |
+
Soft Rejections: Environmental constraints ("too dark", "too hot", "can't reach")
|
| 268 |
+
Environmental Emphasis: Room descriptions that stress properties (loud, cold, dark, narrow, glowing)
|
| 269 |
+
Standard Action Failures: Basic commands (TAKE, EXAMINE, OPEN) producing dynamic responses
|
| 270 |
+
|
| 271 |
+
Combat Situations:
|
| 272 |
+
|
| 273 |
+
Combat actions: Agent using "attack X with Y" format
|
| 274 |
+
Combat feedback: Responses mentioning hits, misses, wounds, dodges, strikes
|
| 275 |
+
Enemy presence: References to creatures/NPCs (troll, thief, grue, dragon, etc.)
|
| 276 |
+
Weapon indicators: Mentions of sword glowing, weapons being wielded
|
| 277 |
+
Key principle: Repeated attack commands are REQUIRED and should be REWARDED during active combat
|
| 278 |
+
|
| 279 |
+
Feedback Taxonomy (align with agent's guidance):
|
| 280 |
+
|
| 281 |
+
Hard Rejection: "I don't understand", "You can't", "There is no X" → Stop exact repetition
|
| 282 |
+
Soft Rejection: "Too dark", "Can't reach", "Too heavy" → Solve prerequisite needed
|
| 283 |
+
Puzzle Feedback: Dynamic/unusual responses → Encourage systematic experimentation
|
| 284 |
+
Combat Feedback: Hits, misses, wounds, dodges → Encourage repeated attacks until resolution
|
| 285 |
+
Success with Complications: Action succeeds but triggers something → Chain reaction
|
| 286 |
+
|
| 287 |
+
Examples:
|
| 288 |
+
|
| 289 |
+
Puzzle Feedback:
|
| 290 |
+
|
| 291 |
+
Action history: "TAKE CRYSTAL" → "The crystal vibrates and phases in and out of existence"
|
| 292 |
+
Analysis: Type 3 (puzzle feedback). Not a failure - this is a CLUE
|
| 293 |
+
Evaluation mode: Reward continued experimentation with different approaches
|
| 294 |
+
|
| 295 |
+
Combat Feedback:
|
| 296 |
+
|
| 297 |
+
Action history: "attack troll with sword" → "Your blow glances off the troll's thick hide"
|
| 298 |
+
Analysis: Type 4 (combat feedback). Combat in progress, changing outcomes each turn
|
| 299 |
+
Evaluation mode: Reward continued attack actions until combat resolves
|
| 300 |
+
|
| 301 |
+
Context Provided: You will receive:
|
| 302 |
+
|
| 303 |
+
Current inventory: Items the agent is currently carrying (critical for evaluating item-based actions)
|
| 304 |
+
Available exits: GROUND TRUTH list of valid exits from game engine (use for movement validation - these are 100% accurate)
|
| 305 |
+
Location-specific failures: Actions marked "IMPORTANT" have previously FAILED at this exact location
|
| 306 |
+
Global action counts: How many times an action has been attempted across all locations
|
| 307 |
+
Recent action history: Last 3 action/response pairs showing immediate context and patterns
|
| 308 |
+
|
| 309 |
+
CRITICAL - No Information Leakage: Your justifications will be shown to the agent when actions are rejected. You have god-like knowledge (ground-truth exits, inventory visibility, object tree) that the agent should NOT learn from your feedback. The agent must discover the world organically through gameplay.
|
| 310 |
+
|
| 311 |
+
When writing justifications, NEVER reveal:
|
| 312 |
+
|
| 313 |
+
Specific exit lists (e.g., "valid exits are [north, south]")
|
| 314 |
+
That information came from "game engine" or "ground truth" or "available exits list"
|
| 315 |
+
Definitive certainty like "will definitely fail" or "guaranteed valid"
|
| 316 |
+
|
| 317 |
+
Instead, use vague language:
|
| 318 |
+
|
| 319 |
+
✅ "This direction is likely invalid for the current location"
|
| 320 |
+
✅ "Movement in this direction appears problematic"
|
| 321 |
+
✅ "This action seems inconsistent with the current state"
|
| 322 |
+
❌ "Direction not in available exits list [north, south, west]"
|
| 323 |
+
❌ "Game engine confirms this is invalid"
|
| 324 |
+
❌ "Will definitely fail - engine confirms invalid"
|
| 325 |
+
|
| 326 |
+
Evaluation Criteria:
|
| 327 |
+
|
| 328 |
+
Context Relevance: Does action match current state? Objects mentioned in descriptions ARE present and interactable.
|
| 329 |
+
|
| 330 |
+
Progress Potential: Will it advance gameplay, solve puzzles, or increase score?
|
| 331 |
+
|
| 332 |
+
Information Gathering: For new situations, does it explore or examine appropriately?
|
| 333 |
+
|
| 334 |
+
Parser Compatibility: Is command syntactically valid (VERB-NOUN format)?
|
| 335 |
+
|
| 336 |
+
Problem Solving & Experimental Actions:
|
| 337 |
+
|
| 338 |
+
Environmental Verb Evaluation:
|
| 339 |
+
|
| 340 |
+
High Score (+0.6 to +0.8):
|
| 341 |
+
Verb relates to emphasized room property (ECHO for "echoing room", COOL for "hot room")
|
| 342 |
+
Standard actions already failed with puzzle feedback (showing graduated approach)
|
| 343 |
+
Thematic item-container matching (sword in armory, treasure in decorative case)
|
| 344 |
+
|
| 345 |
+
Moderate Score (+0.2 to +0.4):
|
| 346 |
+
Creative verb but standard actions not tried yet (premature complexity)
|
| 347 |
+
Environmental verb somewhat related but indirect
|
| 348 |
+
Reasonable experimentation without clear puzzle indicators
|
| 349 |
+
|
| 350 |
+
Low/Negative Score (-0.3 to -0.8):
|
| 351 |
+
Nonsense verbs (BANANA, PURPLE) unrelated to context
|
| 352 |
+
Environmental verb contradicts context (FREEZE in hot room)
|
| 353 |
+
Jumping to complex solutions without trying basics
|
| 354 |
+
|
| 355 |
+
Graduated Complexity Protocol: Check recent action history for:
|
| 356 |
+
Have standard actions been tried? (TAKE, EXAMINE, OPEN, USE)
|
| 357 |
+
Have synonym variations been attempted? (GET, GRAB, STUDY)
|
| 358 |
+
Do environmental clues support this verb? (room description matches verb)
|
| 359 |
+
Is this addressing soft rejection prerequisites? (LIGHT LAMP after "too dark")
|
| 360 |
+
|
| 361 |
+
Scoring Examples:
|
| 362 |
+
Room: "crackling electricity" → Agent: DISCHARGE → +0.7 (environmental match)
|
| 363 |
+
Room: "cold" → Agent: HEAT → +0.6 (thematic verb)
|
| 364 |
+
Room: "cold" → Agent: BANANA → -0.8 (nonsense)
|
| 365 |
+
No puzzle feedback yet → Agent: STABILIZE → +0.2 (creative but try standard first)
|
| 366 |
+
Standard actions got puzzle feedback → Agent: ECHO → +0.8 (protocol adherence)
|
| 367 |
+
Recent "too dark" → Agent: LIGHT LAMP → +0.9 (prerequisite chain recognition)
|
| 368 |
+
|
| 369 |
+
Creative use of inventory: Reward using items to modify environment or solve prerequisites.
|
| 370 |
+
|
| 371 |
+
Combat Action Evaluation:
|
| 372 |
+
|
| 373 |
+
High Score (+0.7 to +0.9):
|
| 374 |
+
Repeated "attack X with Y" during active combat (each turn has different combat feedback)
|
| 375 |
+
Using appropriate weapon for combat situation
|
| 376 |
+
Continuing combat until enemy defeated/fled (shows persistence)
|
| 377 |
+
|
| 378 |
+
Moderate Score (+0.4 to +0.6):
|
| 379 |
+
First attack action when enemy present (initiating combat)
|
| 380 |
+
Switching weapons during combat (tactical adjustment)
|
| 381 |
+
|
| 382 |
+
Low Score (-0.3 to -0.6):
|
| 383 |
+
Non-combat actions during active combat (checking inventory, examining items)
|
| 384 |
+
Fleeing from winnable combat without attempting attack
|
| 385 |
+
Note: Agent guidance prioritizes combat actions during fights
|
| 386 |
+
|
| 387 |
+
Combat Recognition: Check recent responses for combat feedback (hits/misses/wounds), enemy names, or weapon/combat verbs.
|
| 388 |
+
|
| 389 |
+
Anti-Repetition (CRITICAL - Distinguish Loops from Experimentation):
|
| 390 |
+
|
| 391 |
+
SEVERELY PENALIZE (-0.8 to -1.0):
|
| 392 |
+
Exact action repetition: Same command string repeated with hard rejection
|
| 393 |
+
Actions with "IMPORTANT" warnings (already failed at this exact location)
|
| 394 |
+
Actions repeated 3+ times globally with identical hard rejections
|
| 395 |
+
Oscillation patterns (A→B→A→B) showing stuck loops
|
| 396 |
+
|
| 397 |
+
DO NOT PENALIZE (Distinguish from Repetition):
|
| 398 |
+
Systematic experimentation: TAKE→GET→GRAB→ECHO (different verbs, same object)
|
| 399 |
+
Protocol adherence: Trying synonyms before environmental verbs (graduated approach)
|
| 400 |
+
Learning attempts: Each action gets NEW/DIFFERENT feedback (puzzle exploration)
|
| 401 |
+
Active combat: Repeated "attack X with Y" during combat with changing feedback (hits, misses, wounds)
|
| 402 |
+
|
| 403 |
+
REWARD (+0.3 to +0.5):
|
| 404 |
+
Breaking from repetitive patterns
|
| 405 |
+
Exploring new directions or objects after stuck
|
| 406 |
+
Trying different approach after hard rejection (not same exact action)
|
| 407 |
+
|
| 408 |
+
Key Distinction: Count exact command string repetitions, NOT object mention frequency. Example: "TAKE SPHERE" (fail) → "GET SPHERE" (fail) → "DISCHARGE" = systematic, NOT repetition.
|
| 409 |
+
|
| 410 |
+
Movement Validation (CRITICAL - Follow This Logic Exactly):
|
| 411 |
+
|
| 412 |
+
Step 1: Check the "Available exits" list first
|
| 413 |
+
If the proposed direction IS in the "Available exits" list → APPROVE IT (Score +0.5 to +0.8)
|
| 414 |
+
If the proposed direction is NOT in the "Available exits" list → REJECT IT (Score -0.7 to -1.0)
|
| 415 |
+
|
| 416 |
+
Step 2: Only reject exits that are:
|
| 417 |
+
Standard directions NOT in the available exits list (north, south, east, west, up, down, etc.)
|
| 418 |
+
Nonsensical directions (e.g., "purple", "banana")
|
| 419 |
+
Already failed in recent history ("can't go that way" just received)
|
| 420 |
+
|
| 421 |
+
Step 3: Use vague language ONLY when rejecting invalid movements:
|
| 422 |
+
✅ "Movement in this direction appears problematic" (for exits NOT in list)
|
| 423 |
+
❌ DO NOT use vague rejection language for exits that ARE in the available exits list
|
| 424 |
+
|
| 425 |
+
IMPORTANT: The "Available exits" list is 100% accurate ground truth. If a direction appears in this list, it WILL work. Approve it with a positive score unless there are other compelling reasons to reject (e.g., already failed at this specific location).
|
| 426 |
+
|
| 427 |
+
NOTE: The "Available exits" list is authoritative ground truth for YOUR evaluation only - do not mention this in justifications.
|
| 428 |
+
|
| 429 |
+
Risk Assessment: Avoid unnecessary danger without clear reward potential.
|
| 430 |
+
|
| 431 |
+
JUSTIFICATION GUIDELINES:
|
| 432 |
+
|
| 433 |
+
Your justifications will be shown to the agent. They must praise METHODOLOGY without revealing OUTCOMES.
|
| 434 |
+
|
| 435 |
+
Safe Justification Templates:
|
| 436 |
+
|
| 437 |
+
For environmental verbs:
|
| 438 |
+
|
| 439 |
+
✅ "Action matches environmental properties mentioned in room description (methodical experimentation)"
|
| 440 |
+
✅ "Trying alternative verbs after standard actions failed demonstrates systematic approach"
|
| 441 |
+
✅ "Verb selection shows environmental observation and logical inference"
|
| 442 |
+
❌ "This will solve the puzzle" (reveals outcome)
|
| 443 |
+
❌ "This is the correct solution for this room" (reveals success)
|
| 444 |
+
|
| 445 |
+
For thematic containers:
|
| 446 |
+
|
| 447 |
+
✅ "Trying thematic item-container pairing shows pattern recognition"
|
| 448 |
+
✅ "Matching item type to container purpose demonstrates creative problem-solving"
|
| 449 |
+
❌ "Trophy case needs treasures" (reveals mechanic)
|
| 450 |
+
|
| 451 |
+
For prerequisite chains:
|
| 452 |
+
|
| 453 |
+
✅ "Addressing environmental constraint before retrying shows logical sequencing"
|
| 454 |
+
✅ "Solving prerequisite first demonstrates multi-step reasoning"
|
| 455 |
+
❌ "You need light source first, then you can take the scroll" (too specific)
|
| 456 |
+
|
| 457 |
+
For systematic experimentation:
|
| 458 |
+
|
| 459 |
+
✅ "Graduated approach from standard to creative verbs shows protocol adherence"
|
| 460 |
+
✅ "Exploring different verb options after initial failures is sound methodology"
|
| 461 |
+
❌ "Keep trying, you're close to the solution" (reveals proximity)
|
| 462 |
+
|
| 463 |
+
For combat actions:
|
| 464 |
+
|
| 465 |
+
✅ "Continuing attack during active combat shows appropriate persistence"
|
| 466 |
+
✅ "Prioritizing combat actions when enemy is present follows correct protocol"
|
| 467 |
+
✅ "Repeated attack attempts with changing combat feedback demonstrates engagement with combat mechanics"
|
| 468 |
+
❌ "The troll has 2 hit points left, keep attacking" (reveals game state)
|
| 469 |
+
❌ "You'll defeat the enemy in 3 more attacks" (reveals outcome)
|
| 470 |
+
|
| 471 |
+
Focus: Praise the REASONING QUALITY, not the action's likelihood of success.
|
| 472 |
+
|
| 473 |
+
Output Requirements:
|
| 474 |
+
|
| 475 |
+
Provide JSON response with:
|
| 476 |
+
|
| 477 |
+
score: -1.0 to +1.0 scale
|
| 478 |
+
Negative (-1.0 to -0.1): Harmful, repetitive, or nonsensical
|
| 479 |
+
Neutral (0.0): No clear benefit or harm
|
| 480 |
+
Positive (+0.1 to +1.0): Useful exploration, problem-solving, or progress
|
| 481 |
+
justification: Single-line explanation (no newlines)
|
| 482 |
+
confidence: 0.0 to 1.0 (your certainty in THIS EVALUATION, not action success)
|
| 483 |
+
0.9-1.0: Clear evaluation (standard actions with object tree validation)
|
| 484 |
+
0.6-0.8: Moderate certainty (experimental actions with environmental match)
|
| 485 |
+
0.3-0.5: Low certainty (novel situation, insufficient context)
|
| 486 |
+
Note: Experimental actions should generally have lower confidence (0.6-0.7) since outcomes are uncertain
|
| 487 |
+
|
| 488 |
+
Example Responses:
|
| 489 |
+
|
| 490 |
+
Standard action: {"score": 0.7, "justification": "Taking visible object is productive and aligns with inventory collection goals.", "confidence": 0.9}
|
| 491 |
+
|
| 492 |
+
Experimental verb (with puzzle context): {"score": 0.7, "justification": "Verb selection matches emphasized room properties after standard actions failed (systematic experimentation).", "confidence": 0.65}
|
| 493 |
+
|
| 494 |
+
Nonsense action: {"score": -0.8, "justification": "Action appears random and unrelated to current context or environmental clues.", "confidence": 0.8}
|
| 495 |
+
|
| 496 |
+
Premature complexity: {"score": 0.2, "justification": "Creative verb but would benefit from trying standard actions first (graduated approach).", "confidence": 0.7}
|
| 497 |
+
|
| 498 |
+
Prerequisite chain: {"score": 0.9, "justification": "Addressing environmental constraint before object interaction demonstrates multi-step reasoning.", "confidence": 0.85}
|
| 499 |
+
|
| 500 |
+
Combat action (repeated attack): {"score": 0.8, "justification": "Continuing attack during active combat shows appropriate persistence (repeated attacks with changing combat feedback).", "confidence": 0.85}
|
| 501 |
+
|
| 502 |
+
Focus solely on evaluating the proposed action's merit given the current state.
|
| 503 |
+
"""
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
"""You are a strict Logic Critic for a text adventure game.
|
| 509 |
+
Your goal is to validate if a proposed action makes sense given the current situation.
|
| 510 |
+
|
| 511 |
+
INPUT:
|
| 512 |
+
- Current Observation (what the player sees)
|
| 513 |
+
- Inventory
|
| 514 |
+
- History
|
| 515 |
+
- Proposed Action
|
| 516 |
+
|
| 517 |
+
CRITERIA:
|
| 518 |
+
1. Physical Possibility: Can't "take" things not visible. Can't "open" things not present.
|
| 519 |
+
2. Repetition: Penalize actions done repeatedly with no result.
|
| 520 |
+
3. Safety: Warn against obviously fatal actions (jumping off cliffs, eating poison).
|
| 521 |
+
4. Logic: Does the action fit the game state?
|
| 522 |
+
|
| 523 |
+
OUTPUT FORMAT (JSON only):
|
| 524 |
+
{
|
| 525 |
+
"score": <float 0.0 to 1.0, where 0.0 is reject and 1.0 is approve>,
|
| 526 |
+
"justification": "<brief reason>",
|
| 527 |
+
"is_fatal": <bool>
|
| 528 |
+
}
|
| 529 |
+
"""
|
| 530 |
+
|
| 531 |
+
# =============================================================================
|
| 532 |
+
# MEMORY PROMPTS
|
| 533 |
+
# =============================================================================
|
| 534 |
+
|
| 535 |
+
MEMORY_SYNTHESIS_PROMPT = """You are the Memory Architect...
|
| 536 |
+
...
|
| 537 |
+
"""
|
spatial_memory.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
spatial_memory.py
|
| 3 |
+
Analyseur de mouvements et générateur de contexte spatial.
|
| 4 |
+
"""
|
| 5 |
+
from map_graph import MapGraph
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
class SpatialMemorySystem:
|
| 9 |
+
def __init__(self, map_file="zork_map_data.json"):
|
| 10 |
+
self.graph = MapGraph()
|
| 11 |
+
self.map_file = map_file
|
| 12 |
+
self.last_id = None
|
| 13 |
+
self.last_action = None
|
| 14 |
+
self.load()
|
| 15 |
+
|
| 16 |
+
def update(self, current_id: int, current_name: str, last_action: str):
|
| 17 |
+
"""
|
| 18 |
+
C'est le COEUR du système. Analyse le changement d'état.
|
| 19 |
+
"""
|
| 20 |
+
# 1. Enregistrer la salle actuelle
|
| 21 |
+
self.graph.add_or_update_room(current_id, current_name)
|
| 22 |
+
|
| 23 |
+
# 2. Analyse du mouvement
|
| 24 |
+
if self.last_id is not None and last_action:
|
| 25 |
+
# Nettoyage de l'action (garder juste la direction)
|
| 26 |
+
direction = self._extract_direction(last_action)
|
| 27 |
+
|
| 28 |
+
if direction:
|
| 29 |
+
if current_id != self.last_id:
|
| 30 |
+
# SUCCÈS : On a bougé -> Créer connexion
|
| 31 |
+
self.graph.add_connection(self.last_id, current_id, direction)
|
| 32 |
+
else:
|
| 33 |
+
# ÉCHEC : On est resté sur place -> C'est un mur
|
| 34 |
+
self.graph.record_failure(self.last_id, direction)
|
| 35 |
+
|
| 36 |
+
# 3. Sauvegarde automatique
|
| 37 |
+
self.save()
|
| 38 |
+
|
| 39 |
+
# Mise à jour de l'état mémoire
|
| 40 |
+
self.last_id = current_id
|
| 41 |
+
self.last_action = last_action
|
| 42 |
+
|
| 43 |
+
def get_context_for_llm(self, current_id: int) -> str:
|
| 44 |
+
"""Génère le texte 'Spatial Intelligence' pour le prompt."""
|
| 45 |
+
if current_id not in self.graph.rooms:
|
| 46 |
+
return "Spatial Info: analyzing..."
|
| 47 |
+
|
| 48 |
+
room = self.graph.rooms[current_id]
|
| 49 |
+
|
| 50 |
+
# Exits Vérifiées (Sûres à 100%)
|
| 51 |
+
verified = [f"{d}->{self.graph.rooms[c.to_id].name}"
|
| 52 |
+
for d, c in room.exits.items() if c.is_verified]
|
| 53 |
+
|
| 54 |
+
# Exits Inférées (Probables, à tester)
|
| 55 |
+
inferred = [f"{d}?" for d, c in room.exits.items() if not c.is_verified]
|
| 56 |
+
|
| 57 |
+
# Murs (À ne pas réessayer)
|
| 58 |
+
blocked = [d for d, count in room.failed_exits.items() if count >= 1]
|
| 59 |
+
|
| 60 |
+
# Directions inexplorées (Pour l'exploration)
|
| 61 |
+
cardinals = {"north", "south", "east", "west", "up", "down", "ne", "nw", "se", "sw"}
|
| 62 |
+
known = set(room.exits.keys()) | set(room.failed_exits.keys())
|
| 63 |
+
unexplored = list(cardinals - known)
|
| 64 |
+
|
| 65 |
+
return f"""
|
| 66 |
+
*** SPATIAL INTELLIGENCE (Room ID: {current_id}) ***
|
| 67 |
+
📍 CURRENT LOCATION: {room.name} (Visited {room.visited_count} times)
|
| 68 |
+
✅ VERIFIED PATHS: {', '.join(verified) if verified else 'None'}
|
| 69 |
+
🤔 POSSIBLE PATHS (Unverified): {', '.join(inferred) if inferred else 'None'}
|
| 70 |
+
⛔ BLOCKED / DEAD ENDS: {', '.join(blocked) if blocked else 'None'}
|
| 71 |
+
🔍 UNEXPLORED DIRECTIONS: {', '.join(unexplored)}
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
def _extract_direction(self, action: str) -> str:
|
| 75 |
+
"""Extrait 'north' de 'go north'."""
|
| 76 |
+
valid_dirs = ["north", "south", "east", "west", "up", "down",
|
| 77 |
+
"ne", "nw", "se", "sw", "enter", "exit", "n", "s", "e", "w"]
|
| 78 |
+
tokens = action.lower().split()
|
| 79 |
+
for t in tokens:
|
| 80 |
+
if t in valid_dirs:
|
| 81 |
+
return t
|
| 82 |
+
return None
|
| 83 |
+
|
| 84 |
+
def save(self):
|
| 85 |
+
self.graph.save(self.map_file)
|
| 86 |
+
|
| 87 |
+
def load(self):
|
| 88 |
+
self.graph.load(self.map_file)
|
test_jericho.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from jericho.template_action_generator import TemplateActionGenerator
|
| 4 |
+
|
| 5 |
+
# Remonte d'un niveau pour trouver le dossier 'games'
|
| 6 |
+
root = Path(__file__).resolve().parent.parent
|
| 7 |
+
sys.path.append(str(root))
|
| 8 |
+
|
| 9 |
+
from games.zork_env import TextAdventureEnv
|
| 10 |
+
|
| 11 |
+
# def run_crash_test():
|
| 12 |
+
# try:
|
| 13 |
+
# # 1. Initialisation
|
| 14 |
+
# print("🚀 Initialisation de l'environnement...")
|
| 15 |
+
# env_wrapper = TextAdventureEnv("lostpig")
|
| 16 |
+
# env_wrapper.reset()
|
| 17 |
+
# j_env = env_wrapper.env
|
| 18 |
+
|
| 19 |
+
# # 2. Test du Dictionnaire avec types de mots
|
| 20 |
+
# words = j_env.get_dictionary()
|
| 21 |
+
# print(f"\n--- ANALYSE DU DICTIONNAIRE ({len(words)} mots) ---")
|
| 22 |
+
# # On affiche quelques verbes et directions pour vérifier les types
|
| 23 |
+
# count = 0
|
| 24 |
+
# for w in words:
|
| 25 |
+
# if w.is_verb or w.is_dir:
|
| 26 |
+
# type_str = "VERBE" if w.is_verb else "DIRECTION"
|
| 27 |
+
# print(f"Mot: {w.word.ljust(12)} | Type: {type_str}")
|
| 28 |
+
# count += 1
|
| 29 |
+
# if count > 15: break
|
| 30 |
+
|
| 31 |
+
# # 3. Test des PROCHAINES LOCATIONS (Next Locations)
|
| 32 |
+
# print("\n--- TEST DES DIRECTIONS (Next Locations) ---")
|
| 33 |
+
# location = j_env.get_player_location()
|
| 34 |
+
# print(f"📍 Lieu actuel : {location.name} (ID: {location.num})")
|
| 35 |
+
|
| 36 |
+
# # Les directions standards
|
| 37 |
+
# directions = ["north", "south", "east", "west", "up", "down", "in", "out"]
|
| 38 |
+
# for d in directions:
|
| 39 |
+
# # On vérifie si le mot existe dans le dictionnaire avant de tester
|
| 40 |
+
# print(f"Chemin potentiel vers {d}...")
|
| 41 |
+
|
| 42 |
+
# # 4. Test du TEMPLATE ACTION GENERATOR (Le "God Mode")
|
| 43 |
+
# print("\n--- GENERATION D'ACTIONS PAR TEMPLATE ---")
|
| 44 |
+
# # On récupère les objets visibles dans la pièce
|
| 45 |
+
# world_objs = j_env.get_world_objects()
|
| 46 |
+
# inventory = j_env.get_inventory()
|
| 47 |
+
|
| 48 |
+
# # On crée une liste de noms d'objets interactifs (Heuristique : ceux dans la pièce ou l'inventaire)
|
| 49 |
+
# interactive_names = []
|
| 50 |
+
# for obj in world_objs:
|
| 51 |
+
# if obj.parent == location.num:
|
| 52 |
+
# interactive_names.append(obj.name)
|
| 53 |
+
# for item in inventory:
|
| 54 |
+
# interactive_names.append(item.name)
|
| 55 |
+
|
| 56 |
+
# interactive_names =["pig"]
|
| 57 |
+
|
| 58 |
+
# print(f"📦 Objets interactifs détectés : {interactive_names}")
|
| 59 |
+
|
| 60 |
+
# # On initialise le générateur avec les bindings du jeu
|
| 61 |
+
# act_gen = TemplateActionGenerator(j_env.bindings)
|
| 62 |
+
|
| 63 |
+
# # On génère toutes les combinaisons possibles !
|
| 64 |
+
# # C'est ici que tes 1500 verbes se transforment en actions concrètes
|
| 65 |
+
# all_actions = act_gen.generate_actions(interactive_names)
|
| 66 |
+
|
| 67 |
+
# print(f"✨ {len(all_actions)} actions possibles générées via templates.")
|
| 68 |
+
# print(f"Exemple d'actions : {all_actions[40:100]}")
|
| 69 |
+
|
| 70 |
+
# # 5. Scan des objets pour le Radar (ton code précédent)
|
| 71 |
+
# print("\n--- SCAN DES OBJETS ---")
|
| 72 |
+
# for obj in world_objs[:20]: # Limité pour la lisibilité
|
| 73 |
+
# print(f"🎯 OBJ : {obj.name} ")
|
| 74 |
+
|
| 75 |
+
# except Exception as e:
|
| 76 |
+
# import traceback
|
| 77 |
+
# print(f"❌ LE TEST A ÉCHOUÉ : {e}")
|
| 78 |
+
# traceback.print_exc()
|
| 79 |
+
|
| 80 |
+
# if __name__ == "__main__":
|
| 81 |
+
# run_crash_test()
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# ... (tes imports restent les mêmes)
|
| 85 |
+
|
| 86 |
+
def run_crash_test():
|
| 87 |
+
try:
|
| 88 |
+
# 1. Initialisation
|
| 89 |
+
print("🚀 Initialisation de l'environnement...")
|
| 90 |
+
env_wrapper = TextAdventureEnv("lostpig")
|
| 91 |
+
env_wrapper.reset()
|
| 92 |
+
j_env = env_wrapper.env
|
| 93 |
+
|
| 94 |
+
# --- [SECTION AJOUTÉE : WALKTHROUGH] ---
|
| 95 |
+
# 6. Récupération du Walkthrough (Solution pas à pas)
|
| 96 |
+
print("\n--- EXTRACTION DU WALKTHROUGH (SOLUCE) ---")
|
| 97 |
+
walkthrough = j_env.get_walkthrough()
|
| 98 |
+
|
| 99 |
+
if walkthrough:
|
| 100 |
+
print(f"📖 Solution trouvée : {len(walkthrough)} étapes cruciales.")
|
| 101 |
+
print(f"Premières étapes : {walkthrough}")
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
# Optionnel : Tester la première commande du walkthrough
|
| 105 |
+
first_cmd = walkthrough[0]
|
| 106 |
+
obs, reward, done, info = j_env.step(first_cmd)
|
| 107 |
+
print(f"✅ Test commande '{first_cmd}' -> Résultat : {obs[:100]}...")
|
| 108 |
+
else:
|
| 109 |
+
print("⚠️ Aucun walkthrough disponible pour ce jeu dans Jericho.")
|
| 110 |
+
# ----------------------------------------
|
| 111 |
+
|
| 112 |
+
# 2. Test du Dictionnaire (la suite de ton code...)
|
| 113 |
+
words = j_env.get_dictionary()
|
| 114 |
+
# ... rest of your code
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
import traceback
|
| 118 |
+
print(f"❌ LE TEST A ÉCHOUÉ : {e}")
|
| 119 |
+
traceback.print_exc()
|
| 120 |
+
|
| 121 |
+
if __name__ == "__main__":
|
| 122 |
+
run_crash_test()
|