| | import gradio as gr |
| | from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments |
| | from datasets import load_dataset |
| | import numpy as np |
| | import torch |
| |
|
| | |
| | model_name = "gpt2" |
| | tokenizer = GPT2Tokenizer.from_pretrained(model_name) |
| | model = GPT2LMHeadModel.from_pretrained(model_name) |
| |
|
| | |
| | def fine_tune_gpt2_with_ppo(dataset_name, epochs, learning_rate): |
| | |
| | dataset = load_dataset(dataset_name) |
| | |
| | |
| | def encode(examples): |
| | return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128) |
| |
|
| | tokenized_dataset = dataset.map(encode, batched=True) |
| | train_dataset = tokenized_dataset["train"] |
| | |
| | |
| | data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) |
| | training_args = TrainingArguments( |
| | output_dir="./results", |
| | overwrite_output_dir=True, |
| | num_train_epochs=epochs, |
| | per_device_train_batch_size=4, |
| | save_steps=10_000, |
| | save_total_limit=2, |
| | learning_rate=learning_rate |
| | ) |
| | |
| | |
| | trainer = Trainer( |
| | model=model, |
| | args=training_args, |
| | data_collator=data_collator, |
| | train_dataset=train_dataset |
| | ) |
| | |
| | |
| | trainer.train() |
| | |
| | return "Training Completed!" |
| |
|
| | |
| | def train_interface(dataset, epochs, learning_rate): |
| | result = fine_tune_gpt2_with_ppo(dataset, int(epochs), float(learning_rate)) |
| | return result |
| |
|
| | |
| | gradio_interface = gr.Interface( |
| | fn=train_interface, |
| | inputs=[ |
| | gr.inputs.Textbox(label="Dataset (e.g. 'wikitext')"), |
| | gr.inputs.Slider(1, 10, step=1, label="Epochs"), |
| | gr.inputs.Textbox(label="Learning Rate") |
| | ], |
| | outputs="text", |
| | title="GPT-2 RL Training App", |
| | description="Fine-tune GPT-2 using PPO via a Gradio interface." |
| | ) |
| |
|
| | |
| | gradio_interface.launch() |