Transformers
English
faq_finetune / app.py
iamyourai's picture
Update app.py
fd97e42 verified
from datasets import load_dataset
from transformers import GPT2LMHeadModel, GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
examples = load_dataset("Andyrasika/Ecommerce_FAQ")
def tokenize_function(examples):
# Concatenate the question and answer into a single text field
text_pairs = [f"Q: {q} A: {a}" for q, a in zip(examples['question'], examples['answer'])]
return tokenizer(text_pairs, truncation=True, padding=True)
#def tokenize_function(examples):
# return tokenizer(examples['text'], truncation=True, padding=True)
tokenized_dataset = dataset.map(tokenize_function, batched=True)
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(
output_dir='./results', # Output directory
num_train_epochs=3, # Number of epochs
per_device_train_batch_size=4, # Batch size per device
save_steps=10_000, # Save checkpoint every 10,000 steps
save_total_limit=2, # Only last 2 checkpoints are kept
)
trainer = Trainer(
model=model, # The pre-trained GPT-2 model
args=training_args, # Training arguments
train_dataset=tokenized_dataset, # The dataset you prepared
)
trainer.train()
model.save_pretrained('./faqmodel')
tokenizer.save_pretrained('./faqmodel')