from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments from datasets import load_dataset # Load model and tokenizer model_name = "microsoft/Phi-3-mini-128k-instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Load dataset dataset = load_dataset("text", data_files="combined_tokenized_data.txt")["train"] # ... (rest of your code for tokenization, data collator, training arguments, etc.)