FireRed Team commited on
Commit
034000c
·
verified ·
1 Parent(s): 970908d

Update fireredasr2s/fireredasr2/tokenizer/llm_tokenizer.py

Browse files
fireredasr2s/fireredasr2/tokenizer/llm_tokenizer.py CHANGED
@@ -66,7 +66,6 @@ class LlmTokenizerWrapper:
66
  tokenize=True,
67
  chat_template=TEMPLATE,
68
  add_generation_prompt=False,
69
- padding="longest",
70
  max_length=max_len,
71
  truncation=True,
72
  )
@@ -76,7 +75,7 @@ class LlmTokenizerWrapper:
76
  max_len_texts = max([len(text) for text in texts])
77
  if tokenizer.padding_side == "right":
78
  texts = [
79
- text + [tokenizer.pad_token_id] * (max_len_texts - len(text))
80
  for text in texts
81
  ]
82
  else:
 
66
  tokenize=True,
67
  chat_template=TEMPLATE,
68
  add_generation_prompt=False,
 
69
  max_length=max_len,
70
  truncation=True,
71
  )
 
75
  max_len_texts = max([len(text) for text in texts])
76
  if tokenizer.padding_side == "right":
77
  texts = [
78
+ list(text) + [tokenizer.pad_token_id] * (max_len_texts - len(text))
79
  for text in texts
80
  ]
81
  else: