Spaces:
Runtime error
Runtime error
File size: 1,542 Bytes
246f8d0 4a99d8f 246f8d0 4a99d8f 246f8d0 4a99d8f 246f8d0 4a99d8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Trainer, TrainingArguments
# Load your dataset
dataset = load_dataset("vidu8/ch01")
# Load tokenizer and model
model_name = "t5-small" # lightweight and fast
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Prepare dataset
def preprocess(example):
inputs = "chat: " + example["input_text"]
targets = example["target_text"]
model_inputs = tokenizer(inputs, max_length=128, truncation=True)
labels = tokenizer(targets, max_length=128, truncation=True)
model_inputs["labels"] = labels["input_ids"]
return model_inputs
train_dataset = dataset["train"].map(preprocess, batched=False)
# Load model
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# Set training arguments
training_args = TrainingArguments(
output_dir="./results",
num_train_epochs=3,
per_device_train_batch_size=4,
logging_steps=10,
save_steps=100,
save_total_limit=1,
evaluation_strategy="no",
)
# Define Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
)
# Train
trainer.train()
# Gradio interface
def chat(input_text):
inputs = tokenizer("chat: " + input_text, return_tensors="pt")
outputs = model.generate(**inputs, max_length=50)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
iface = gr.Interface(fn=chat, inputs="text", outputs="text", title="Simple Chatbot")
iface.launch()
|