Spaces:
Sleeping
Sleeping
Update train.py
Browse files
train.py
CHANGED
|
@@ -12,11 +12,11 @@ model = AutoModelForCausalLM.from_pretrained("distilgpt2")
|
|
| 12 |
tokenizer.pad_token = tokenizer.eos_token
|
| 13 |
model.config.pad_token_id = tokenizer.pad_token_id
|
| 14 |
|
| 15 |
-
# Tokenize function
|
| 16 |
def tokenize_function(example):
|
| 17 |
full_text = example["prompt"] + example["completion"]
|
| 18 |
tokens = tokenizer(full_text, truncation=True, padding="max_length", max_length=512)
|
| 19 |
-
tokens["labels"] = tokens["input_ids"].copy()
|
| 20 |
return tokens
|
| 21 |
|
| 22 |
# Tokenize
|
|
|
|
| 12 |
tokenizer.pad_token = tokenizer.eos_token
|
| 13 |
model.config.pad_token_id = tokenizer.pad_token_id
|
| 14 |
|
| 15 |
+
# Tokenize function
|
| 16 |
def tokenize_function(example):
|
| 17 |
full_text = example["prompt"] + example["completion"]
|
| 18 |
tokens = tokenizer(full_text, truncation=True, padding="max_length", max_length=512)
|
| 19 |
+
tokens["labels"] = tokens["input_ids"].copy()
|
| 20 |
return tokens
|
| 21 |
|
| 22 |
# Tokenize
|