import gradio as gr import torch from datasets import load_dataset from transformers import ( AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer ) def train_cpu_optimized(): """Train TinyBERT with CPU Basic optimized settings""" model_name = "huawei-noah/TinyBERT_General_4L_312D" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) # Load IMDB dataset raw_dataset = load_dataset("imdb") def tokenize_function(examples): return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64) # CPU-friendly dataset sizes train_ds = raw_dataset["train"].shuffle(seed=42).select(range(500)) eval_ds = raw_dataset["test"].shuffle(seed=42).select(range(200)) train_dataset = train_ds.map(tokenize_function, batched=True) eval_dataset = eval_ds.map(tokenize_function, batched=True) # CPU-optimized training arguments training_args = TrainingArguments( output_dir="./results", num_train_epochs=3, per_device_train_batch_size=8, per_device_eval_batch_size=16, learning_rate=3e-4, evaluation_strategy="epoch", save_strategy="epoch", logging_steps=25, dataloader_num_workers=0, report_to="none", ) trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset ) # Start training trainer.train() # Save the final model trainer.save_model("./final_tinybert_model") tokenizer.save_pretrained("./final_tinybert_model") return "โœ… Training complete! Model saved to ./final_tinybert_model" def test_model(text): """Test your trained model""" try: from transformers import pipeline pipe = pipeline("sentiment-analysis", model="./final_tinybert_model") result = pipe(text) return f"Prediction: {result[0]['label']} (Confidence: {result[0]['score']:.3f})" except Exception as e: return f"Error: {str(e)}. Please train the model first!" # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("# ๐Ÿค– TinyBERT CPU-Optimized Training") gr.Markdown("**Complete ML workflow on CPU Basic - perfectly optimized for your hardware!**") with gr.Tab("๐Ÿš€ Train Model"): gr.Markdown("This will train TinyBERT on 500 IMDB samples (15-20 minutes)") train_btn = gr.Button("Start CPU-Optimized Training") train_output = gr.Textbox(label="Training Progress", lines=5) train_btn.click(train_cpu_optimized, outputs=train_output) with gr.Tab("๐Ÿงช Test Model"): gr.Markdown("Test your trained sentiment analysis model:") test_input = gr.Textbox(label="Enter text to analyze", placeholder="This movie was fantastic!") test_btn = gr.Button("Analyze Sentiment") test_output = gr.Textbox(label="Prediction Result") test_btn.click(test_model, inputs=test_input, outputs=test_output) demo.launch()