jmcinern commited on
Commit
cedd2f6
·
verified ·
1 Parent(s): 0241424

Update app.py

Browse files

eos token set to im_end and debugging

Files changed (1) hide show
  1. app.py +2 -6
app.py CHANGED
@@ -25,6 +25,7 @@ class ChatBot:
25
  self.tokenizer = AutoTokenizer.from_pretrained(
26
  MODEL_NAME, trust_remote_code=True
27
  )
 
28
  print("Tokenizer loaded!")
29
 
30
  print("Loading model...")
@@ -68,11 +69,6 @@ class ChatBot:
68
  # Tokenize
69
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
70
 
71
- # Get stop token IDs for "assistant\n"
72
- stop_token_ids = self.tokenizer.encode(
73
- "assistant\n", add_special_tokens=False
74
- )
75
-
76
  # Generate response
77
  with torch.no_grad():
78
  outputs = self.model.generate(
@@ -82,7 +78,7 @@ class ChatBot:
82
  do_sample=True,
83
  return_dict_in_generate=True,
84
  pad_token_id=self.tokenizer.eos_token_id,
85
- eos_token_id=[self.tokenizer.eos_token_id] + stop_token_ids,
86
  )
87
 
88
  # Decode and clean response, with multiple debugs
 
25
  self.tokenizer = AutoTokenizer.from_pretrained(
26
  MODEL_NAME, trust_remote_code=True
27
  )
28
+ print(self.tokenizer.eos_token_id)
29
  print("Tokenizer loaded!")
30
 
31
  print("Loading model...")
 
69
  # Tokenize
70
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
71
 
 
 
 
 
 
72
  # Generate response
73
  with torch.no_grad():
74
  outputs = self.model.generate(
 
78
  do_sample=True,
79
  return_dict_in_generate=True,
80
  pad_token_id=self.tokenizer.eos_token_id,
81
+ eos_token_id=[self.tokenizer.eos_token_id] + "<|im_end|>",
82
  )
83
 
84
  # Decode and clean response, with multiple debugs