from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
加載預訓練模型和標記器
model = GPT2LMHeadModel.from_pretrained('gpt2') tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
準備數據集(假設您有一個自定義的數據集)
train_dataset = https://www.ird.gov.hk/datagovhk/brfee.csv # 您的訓練數據集
設置訓練參數
training_args = TrainingArguments( output_dir='./results', num_train_epochs=3, per_device_train_batch_size=4, save_steps=10_000, save_total_limit=2, )
訓練模型
trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, )
trainer.train()