Spaces:
Sleeping
Sleeping
File size: 3,125 Bytes
66ad5f3 afbea16 66ad5f3 afbea16 39a1fe7 afbea16 66ad5f3 afbea16 66ad5f3 afbea16 66ad5f3 afbea16 66ad5f3 afbea16 66ad5f3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | import gradio as gr
import torch
from datasets import load_dataset
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
TrainingArguments,
Trainer
)
def train_cpu_optimized():
"""Train TinyBERT with CPU Basic optimized settings"""
model_name = "huawei-noah/TinyBERT_General_4L_312D"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
# Load IMDB dataset
raw_dataset = load_dataset("imdb")
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)
# CPU-friendly dataset sizes
train_ds = raw_dataset["train"].shuffle(seed=42).select(range(500))
eval_ds = raw_dataset["test"].shuffle(seed=42).select(range(200))
train_dataset = train_ds.map(tokenize_function, batched=True)
eval_dataset = eval_ds.map(tokenize_function, batched=True)
# CPU-optimized training arguments
training_args = TrainingArguments(
output_dir="./results",
num_train_epochs=3,
per_device_train_batch_size=8,
per_device_eval_batch_size=16,
learning_rate=3e-4,
evaluation_strategy="epoch",
save_strategy="epoch",
logging_steps=25,
dataloader_num_workers=0,
report_to="none",
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset
)
# Start training
trainer.train()
# Save the final model
trainer.save_model("./final_tinybert_model")
tokenizer.save_pretrained("./final_tinybert_model")
return "✅ Training complete! Model saved to ./final_tinybert_model"
def test_model(text):
"""Test your trained model"""
try:
from transformers import pipeline
pipe = pipeline("sentiment-analysis", model="./final_tinybert_model")
result = pipe(text)
return f"Prediction: {result[0]['label']} (Confidence: {result[0]['score']:.3f})"
except Exception as e:
return f"Error: {str(e)}. Please train the model first!"
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# 🤖 TinyBERT CPU-Optimized Training")
gr.Markdown("**Complete ML workflow on CPU Basic - perfectly optimized for your hardware!**")
with gr.Tab("🚀 Train Model"):
gr.Markdown("This will train TinyBERT on 500 IMDB samples (15-20 minutes)")
train_btn = gr.Button("Start CPU-Optimized Training")
train_output = gr.Textbox(label="Training Progress", lines=5)
train_btn.click(train_cpu_optimized, outputs=train_output)
with gr.Tab("🧪 Test Model"):
gr.Markdown("Test your trained sentiment analysis model:")
test_input = gr.Textbox(label="Enter text to analyze", placeholder="This movie was fantastic!")
test_btn = gr.Button("Analyze Sentiment")
test_output = gr.Textbox(label="Prediction Result")
test_btn.click(test_model, inputs=test_input, outputs=test_output)
demo.launch()
|