Spaces:
Build error
Build error
| from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments | |
| # Specify the model and tokenizer | |
| model_name = "gpt2" | |
| model = GPT2LMHeadModel.from_pretrained(model_name) | |
| tokenizer = GPT2Tokenizer.from_pretrained(model_name) | |
| # Create a dataset from your text file | |
| dataset = TextDataset( | |
| tokenizer=tokenizer, | |
| file_path="my_text_file.txt", | |
| block_size=128, | |
| ) | |
| # Create a data collator | |
| data_collator = DataCollatorForLanguageModeling( | |
| tokenizer=tokenizer, | |
| mlm=False, | |
| ) | |
| # Specify the training arguments | |
| training_args = TrainingArguments( | |
| output_dir="./results", | |
| overwrite_output_dir=True, | |
| num_train_epochs=3, | |
| per_device_train_batch_size=1, | |
| save_steps=10_000, | |
| save_total_limit=2, | |
| ) | |
| # Create a trainer | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| data_collator=data_collator, | |
| train_dataset=dataset, | |
| ) | |
| # Train the model | |
| trainer.train() | |