FireRed Team commited on
Commit
15addad
·
verified ·
1 Parent(s): 034000c

Update fireredasr2s/fireredasr2/tokenizer/llm_tokenizer.py

Browse files
fireredasr2s/fireredasr2/tokenizer/llm_tokenizer.py CHANGED
@@ -75,7 +75,7 @@ class LlmTokenizerWrapper:
75
  max_len_texts = max([len(text) for text in texts])
76
  if tokenizer.padding_side == "right":
77
  texts = [
78
- list(text) + [tokenizer.pad_token_id] * (max_len_texts - len(text))
79
  for text in texts
80
  ]
81
  else:
@@ -83,6 +83,7 @@ class LlmTokenizerWrapper:
83
  [tokenizer.pad_token_id] * (max_len_texts - len(text)) + text
84
  for text in texts
85
  ]
 
86
  input_ids = torch.tensor(texts, dtype=torch.int)
87
 
88
  target_ids = input_ids.clone()
 
75
  max_len_texts = max([len(text) for text in texts])
76
  if tokenizer.padding_side == "right":
77
  texts = [
78
+ text + [tokenizer.pad_token_id] * (max_len_texts - len(text))
79
  for text in texts
80
  ]
81
  else:
 
83
  [tokenizer.pad_token_id] * (max_len_texts - len(text)) + text
84
  for text in texts
85
  ]
86
+ print(texts)
87
  input_ids = torch.tensor(texts, dtype=torch.int)
88
 
89
  target_ids = input_ids.clone()