File size: 507 Bytes
10b8d6f |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
from datasets import load_dataset
# Load model and tokenizer
model_name = "microsoft/Phi-3-mini-128k-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Load dataset
dataset = load_dataset("text", data_files="combined_tokenized_data.txt")["train"]
# ... (rest of your code for tokenization, data collator, training arguments, etc.)
|