jeromekenny's picture
Update app.py
afbea16 verified
import gradio as gr
import torch
from datasets import load_dataset
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
TrainingArguments,
Trainer
)
def train_cpu_optimized():
"""Train TinyBERT with CPU Basic optimized settings"""
model_name = "huawei-noah/TinyBERT_General_4L_312D"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
# Load IMDB dataset
raw_dataset = load_dataset("imdb")
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)
# CPU-friendly dataset sizes
train_ds = raw_dataset["train"].shuffle(seed=42).select(range(500))
eval_ds = raw_dataset["test"].shuffle(seed=42).select(range(200))
train_dataset = train_ds.map(tokenize_function, batched=True)
eval_dataset = eval_ds.map(tokenize_function, batched=True)
# CPU-optimized training arguments
training_args = TrainingArguments(
output_dir="./results",
num_train_epochs=3,
per_device_train_batch_size=8,
per_device_eval_batch_size=16,
learning_rate=3e-4,
evaluation_strategy="epoch",
save_strategy="epoch",
logging_steps=25,
dataloader_num_workers=0,
report_to="none",
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset
)
# Start training
trainer.train()
# Save the final model
trainer.save_model("./final_tinybert_model")
tokenizer.save_pretrained("./final_tinybert_model")
return "βœ… Training complete! Model saved to ./final_tinybert_model"
def test_model(text):
"""Test your trained model"""
try:
from transformers import pipeline
pipe = pipeline("sentiment-analysis", model="./final_tinybert_model")
result = pipe(text)
return f"Prediction: {result[0]['label']} (Confidence: {result[0]['score']:.3f})"
except Exception as e:
return f"Error: {str(e)}. Please train the model first!"
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# πŸ€– TinyBERT CPU-Optimized Training")
gr.Markdown("**Complete ML workflow on CPU Basic - perfectly optimized for your hardware!**")
with gr.Tab("πŸš€ Train Model"):
gr.Markdown("This will train TinyBERT on 500 IMDB samples (15-20 minutes)")
train_btn = gr.Button("Start CPU-Optimized Training")
train_output = gr.Textbox(label="Training Progress", lines=5)
train_btn.click(train_cpu_optimized, outputs=train_output)
with gr.Tab("πŸ§ͺ Test Model"):
gr.Markdown("Test your trained sentiment analysis model:")
test_input = gr.Textbox(label="Enter text to analyze", placeholder="This movie was fantastic!")
test_btn = gr.Button("Analyze Sentiment")
test_output = gr.Textbox(label="Prediction Result")
test_btn.click(test_model, inputs=test_input, outputs=test_output)
demo.launch()