Spaces:
Runtime error
Runtime error
| # Import Libraries | |
| import torch | |
| from torch.utils.data import DataLoader | |
| from transformers import BertTokenizer, BertForSequenceClassification, AdamW, pipeline | |
| from transformers import get_scheduler | |
| from datasets import load_dataset | |
| from sklearn.metrics import accuracy_score, classification_report | |
| import gradio as gr | |
| import numpy as np | |
| import random | |
| # Set Random Seeds for Reproducibility | |
| torch.manual_seed(42) | |
| random.seed(42) | |
| np.random.seed(42) | |
| # Load IMDb Dataset | |
| dataset = load_dataset('imdb') | |
| # Load Pretrained Tokenizer | |
| tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
| # Tokenization Function | |
| def tokenize_function(batch): | |
| return tokenizer(batch['text'], padding="max_length", truncation=True, max_length=128) | |
| # Tokenize the Dataset | |
| tokenized_datasets = dataset.map(tokenize_function, batched=True) | |
| # Remove the Original Text to Save Memory | |
| tokenized_datasets = tokenized_datasets.remove_columns(['text']) | |
| # Rename 'label' to 'labels' for Compatibility with Transformers | |
| tokenized_datasets = tokenized_datasets.rename_column("label", "labels") | |
| # Set Dataset Format for PyTorch | |
| tokenized_datasets.set_format("torch") | |
| # Split the Data | |
| train_dataset = tokenized_datasets["train"] | |
| test_dataset = tokenized_datasets["test"] | |
| # Create Data Loaders | |
| train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True) | |
| test_loader = DataLoader(test_dataset, batch_size=16) | |
| # Load Pretrained BERT Model for Sequence Classification | |
| model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2) | |
| # Define Optimizer | |
| optimizer = AdamW(model.parameters(), lr=5e-5) | |
| # Learning Rate Scheduler | |
| num_training_steps = len(train_loader) * 3 # 3 epochs | |
| lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps) | |
| # Move Model to GPU if Available | |
| device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") | |
| model.to(device) | |
| # Training Loop | |
| def train_model(): | |
| model.train() | |
| for epoch in range(3): # 3 Epochs | |
| print(f"Epoch {epoch+1}") | |
| for batch in train_loader: | |
| # Move Batch to Device | |
| batch = {k: v.to(device) for k, v in batch.items()} | |
| outputs = model(**batch) | |
| loss = outputs.loss | |
| # Backpropagation | |
| loss.backward() | |
| optimizer.step() | |
| lr_scheduler.step() | |
| optimizer.zero_grad() | |
| print(f"Loss: {loss.item()}") | |
| # Evaluation Function | |
| def evaluate_model(): | |
| model.eval() | |
| preds, labels = [], [] | |
| with torch.no_grad(): | |
| for batch in test_loader: | |
| batch = {k: v.to(device) for k, v in batch.items()} | |
| outputs = model(**batch) | |
| logits = outputs.logits | |
| preds.extend(torch.argmax(logits, axis=1).cpu().numpy()) | |
| labels.extend(batch["labels"].cpu().numpy()) | |
| accuracy = accuracy_score(labels, preds) | |
| print("Accuracy:", accuracy) | |
| print("Classification Report:\n", classification_report(labels, preds)) | |
| # Train and Evaluate the Model | |
| train_model() | |
| evaluate_model() | |
| # Save the Model for Deployment | |
| model.save_pretrained("sentiment_model") | |
| tokenizer.save_pretrained("sentiment_model") | |
| # Deploy the Model with Gradio | |
| sentiment_pipeline = pipeline("sentiment-analysis", model="sentiment_model") | |
| # Gradio Inference Function | |
| def analyze_sentiment(review): | |
| result = sentiment_pipeline(review) | |
| return result[0]['label'] | |
| # Gradio Interface | |
| iface = gr.Interface( | |
| fn=analyze_sentiment, | |
| inputs=gr.Textbox(lines=5, placeholder="Enter a movie review..."), | |
| outputs="text", | |
| title="IMDb Sentiment Analysis", | |
| ) | |
| # Launch the Gradio App | |
| iface.launch() | |