import gradio as gr from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments from datasets import load_dataset import numpy as np import torch # Load GPT2 Model and Tokenizer model_name = "gpt2" tokenizer = GPT2Tokenizer.from_pretrained(model_name) model = GPT2LMHeadModel.from_pretrained(model_name) # Define PPO Training Function (simplified) def fine_tune_gpt2_with_ppo(dataset_name, epochs, learning_rate): # Load the dataset dataset = load_dataset(dataset_name) # Prepare dataset for GPT-2 training def encode(examples): return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128) tokenized_dataset = dataset.map(encode, batched=True) train_dataset = tokenized_dataset["train"] # Prepare data collator and training arguments data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) training_args = TrainingArguments( output_dir="./results", overwrite_output_dir=True, num_train_epochs=epochs, per_device_train_batch_size=4, save_steps=10_000, save_total_limit=2, learning_rate=learning_rate ) # Trainer trainer = Trainer( model=model, args=training_args, data_collator=data_collator, train_dataset=train_dataset ) # Train model trainer.train() return "Training Completed!" # Gradio Interface def train_interface(dataset, epochs, learning_rate): result = fine_tune_gpt2_with_ppo(dataset, int(epochs), float(learning_rate)) return result # Gradio App gradio_interface = gr.Interface( fn=train_interface, inputs=[ gr.inputs.Textbox(label="Dataset (e.g. 'wikitext')"), gr.inputs.Slider(1, 10, step=1, label="Epochs"), gr.inputs.Textbox(label="Learning Rate") ], outputs="text", title="GPT-2 RL Training App", description="Fine-tune GPT-2 using PPO via a Gradio interface." ) # Launch the app gradio_interface.launch()