david-ar commited on
Commit
a12fc76
·
verified ·
1 Parent(s): a0b41bb

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. modeling_twentyq.py +30 -13
modeling_twentyq.py CHANGED
@@ -270,23 +270,40 @@ class TwentyQForCausalLM(PreTrainedModel, GenerationMixin):
270
 
271
  return best_q
272
 
273
- def play(self):
274
- """Interactive CLI mode play 20Q in the terminal."""
275
  self._ensure_strings()
276
- print("\n Think of something and I'll try to guess it in 20 questions.\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  input(" Press Enter when ready... ")
278
 
279
- # Use the same generate() path as the pipeline
280
- conversation = "Think of something and I'll try to guess it in 20 questions.\n"
281
  while True:
282
- # Generate next response
283
- conversation += "[A] "
284
- ids = torch.tensor([list(conversation.encode("utf-8"))])
285
  out = self.generate(ids)
286
- response = bytes(out[0, ids.shape[1]:].tolist()).decode("utf-8", errors="replace")
287
- conversation += response + "\n"
 
 
288
 
289
- print(f"\n 20Q: {response}")
290
  if "I win" in response or "stumped" in response:
291
  return
292
 
@@ -297,7 +314,7 @@ class TwentyQForCausalLM(PreTrainedModel, GenerationMixin):
297
  else:
298
  hint = "(Yes/No/Probably/Doubtful/Maybe/Unknown)"
299
 
300
- reply = input(f" You {hint}: ").strip()
301
  if not reply:
302
  return
303
- conversation += f"[U] {reply}\n"
 
270
 
271
  return best_q
272
 
273
+ def play(self, tokenizer=None):
274
+ """Interactive CLI mode. Pass the tokenizer for proper chat template formatting."""
275
  self._ensure_strings()
276
+
277
+ if tokenizer is None:
278
+ # Minimal fallback — construct chat text directly
279
+ from .tokenization_twentyq import TwentyQTokenizer
280
+ tokenizer = TwentyQTokenizer()
281
+ tokenizer.chat_template = (
282
+ "{% if messages[0]['role'] == 'system' %}{{ messages[0]['content'] }}\n"
283
+ "{% set loop_messages = messages[1:] %}{% else %}"
284
+ "{% set loop_messages = messages %}{% endif %}"
285
+ "{% for message in loop_messages %}"
286
+ "{% if message['role'] == 'assistant' %}[A] {{ message['content'] }}\n"
287
+ "{% elif message['role'] == 'user' %}[U] {{ message['content'] }}\n"
288
+ "{% endif %}{% endfor %}"
289
+ "{% if add_generation_prompt %}[A] {% endif %}"
290
+ )
291
+
292
+ messages = [
293
+ {"role": "system", "content": "Think of something and I'll try to guess it in 20 questions."},
294
+ ]
295
+ print("\n Think of something...\n")
296
  input(" Press Enter when ready... ")
297
 
 
 
298
  while True:
299
+ text = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
300
+ ids = tokenizer.encode(text, return_tensors="pt")
 
301
  out = self.generate(ids)
302
+ response = tokenizer.decode(out[0, ids.shape[1]:].tolist())
303
+
304
+ messages.append({"role": "assistant", "content": response})
305
+ print(f"\n > {response}")
306
 
 
307
  if "I win" in response or "stumped" in response:
308
  return
309
 
 
314
  else:
315
  hint = "(Yes/No/Probably/Doubtful/Maybe/Unknown)"
316
 
317
+ reply = input(f" {hint}: ").strip()
318
  if not reply:
319
  return
320
+ messages.append({"role": "user", "content": reply})