from datasets import load_dataset from transformers import AutoTokenizer, TFAutoModelForSequenceClassification import tensorflow as tf tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") model = TFAutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased") dataset = load_dataset('imdb') train_data = dataset['train'] test_data = dataset['test'] tokenized_train = tokenizer(train_data['text'], truncation=True, padding=True, return_tensors="tf") tokenized_test = tokenizer(test_data['text'], truncation=True, padding=True, return_tensors="tf") outputs = model(**tokenized_test) logits = outputs.logits pred = tf.argmax(logits, axis=-1)