david-ar
/

20q

@@ -270,23 +270,40 @@ class TwentyQForCausalLM(PreTrainedModel, GenerationMixin):
         return best_q
-    def play(self):
-        """Interactive CLI mode — play 20Q in the terminal."""
         self._ensure_strings()
-        print("\n  Think of something and I'll try to guess it in 20 questions.\n")
         input("  Press Enter when ready... ")
-        # Use the same generate() path as the pipeline
-        conversation = "Think of something and I'll try to guess it in 20 questions.\n"
         while True:
-            # Generate next response
-            conversation += "[A] "
-            ids = torch.tensor([list(conversation.encode("utf-8"))])
             out = self.generate(ids)
-            response = bytes(out[0, ids.shape[1]:].tolist()).decode("utf-8", errors="replace")
-            conversation += response + "\n"
-            print(f"\n  20Q: {response}")
             if "I win" in response or "stumped" in response:
                 return
@@ -297,7 +314,7 @@ class TwentyQForCausalLM(PreTrainedModel, GenerationMixin):
             else:
                 hint = "(Yes/No/Probably/Doubtful/Maybe/Unknown)"
-            reply = input(f"  You {hint}: ").strip()
             if not reply:
                 return
-            conversation += f"[U] {reply}\n"

         return best_q
+    def play(self, tokenizer=None):
+        """Interactive CLI mode. Pass the tokenizer for proper chat template formatting."""
         self._ensure_strings()
+        if tokenizer is None:
+            # Minimal fallback — construct chat text directly
+            from .tokenization_twentyq import TwentyQTokenizer
+            tokenizer = TwentyQTokenizer()
+            tokenizer.chat_template = (
+                "{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] }}\n"
+                "{% set loop_messages = messages[1:] %}{% else %}"
+                "{% set loop_messages = messages %}{% endif %}"
+                "{% for message in loop_messages %}"
+                "{% if message['role'] == 'assistant' %}[A] {{ message['content'] }}\n"
+                "{% elif message['role'] == 'user' %}[U] {{ message['content'] }}\n"
+                "{% endif %}{% endfor %}"
+                "{% if add_generation_prompt %}[A] {% endif %}"
+            )
+        messages = [
+            {"role": "system", "content": "Think of something and I'll try to guess it in 20 questions."},
+        ]
+        print("\n  Think of something...\n")
         input("  Press Enter when ready... ")
         while True:
+            text = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
+            ids = tokenizer.encode(text, return_tensors="pt")
             out = self.generate(ids)
+            response = tokenizer.decode(out[0, ids.shape[1]:].tolist())
+            messages.append({"role": "assistant", "content": response})
+            print(f"\n  > {response}")
             if "I win" in response or "stumped" in response:
                 return
             else:
                 hint = "(Yes/No/Probably/Doubtful/Maybe/Unknown)"
+            reply = input(f"  {hint}: ").strip()
             if not reply:
                 return
+            messages.append({"role": "user", "content": reply})