Charlie81 commited on
Commit
52bdc02
·
1 Parent(s): d6ffab2
Files changed (1) hide show
  1. scripts/train.py +6 -6
scripts/train.py CHANGED
@@ -52,12 +52,12 @@ def main():
52
  formatted += f"{role.capitalize()}: {content}\n"
53
  texts.append(formatted)
54
 
55
- return tokenizer(
56
- texts,
57
- truncation=True,
58
- max_length=4096,
59
- padding="max_length"
60
- )
61
 
62
 
63
  tokenized_dataset = dataset.map(
 
52
  formatted += f"{role.capitalize()}: {content}\n"
53
  texts.append(formatted)
54
 
55
+ return tokenizer(
56
+ texts,
57
+ truncation=True,
58
+ max_length=4096,
59
+ padding="max_length"
60
+ )
61
 
62
 
63
  tokenized_dataset = dataset.map(