Percy3822 commited on
Commit
398ce43
·
verified ·
1 Parent(s): de3a096

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +2 -2
train.py CHANGED
@@ -12,11 +12,11 @@ model = AutoModelForCausalLM.from_pretrained("distilgpt2")
12
  tokenizer.pad_token = tokenizer.eos_token
13
  model.config.pad_token_id = tokenizer.pad_token_id
14
 
15
- # Tokenize function: provide input_ids + labels (needed for loss)
16
  def tokenize_function(example):
17
  full_text = example["prompt"] + example["completion"]
18
  tokens = tokenizer(full_text, truncation=True, padding="max_length", max_length=512)
19
- tokens["labels"] = tokens["input_ids"].copy() # 👈 labels = input_ids for language modeling
20
  return tokens
21
 
22
  # Tokenize
 
12
  tokenizer.pad_token = tokenizer.eos_token
13
  model.config.pad_token_id = tokenizer.pad_token_id
14
 
15
+ # Tokenize function
16
  def tokenize_function(example):
17
  full_text = example["prompt"] + example["completion"]
18
  tokens = tokenizer(full_text, truncation=True, padding="max_length", max_length=512)
19
+ tokens["labels"] = tokens["input_ids"].copy()
20
  return tokens
21
 
22
  # Tokenize