File size: 2,091 Bytes
4f117ff | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | import gradio as gr
from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
from datasets import load_dataset
import numpy as np
import torch
# Load GPT2 Model and Tokenizer
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
# Define PPO Training Function (simplified)
def fine_tune_gpt2_with_ppo(dataset_name, epochs, learning_rate):
# Load the dataset
dataset = load_dataset(dataset_name)
# Prepare dataset for GPT-2 training
def encode(examples):
return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)
tokenized_dataset = dataset.map(encode, batched=True)
train_dataset = tokenized_dataset["train"]
# Prepare data collator and training arguments
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
training_args = TrainingArguments(
output_dir="./results",
overwrite_output_dir=True,
num_train_epochs=epochs,
per_device_train_batch_size=4,
save_steps=10_000,
save_total_limit=2,
learning_rate=learning_rate
)
# Trainer
trainer = Trainer(
model=model,
args=training_args,
data_collator=data_collator,
train_dataset=train_dataset
)
# Train model
trainer.train()
return "Training Completed!"
# Gradio Interface
def train_interface(dataset, epochs, learning_rate):
result = fine_tune_gpt2_with_ppo(dataset, int(epochs), float(learning_rate))
return result
# Gradio App
gradio_interface = gr.Interface(
fn=train_interface,
inputs=[
gr.inputs.Textbox(label="Dataset (e.g. 'wikitext')"),
gr.inputs.Slider(1, 10, step=1, label="Epochs"),
gr.inputs.Textbox(label="Learning Rate")
],
outputs="text",
title="GPT-2 RL Training App",
description="Fine-tune GPT-2 using PPO via a Gradio interface."
)
# Launch the app
gradio_interface.launch() |