File size: 675 Bytes
27efcf2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
from datasets import load_dataset
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
import tensorflow as tf
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = TFAutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased")
dataset = load_dataset('imdb')
train_data = dataset['train']
test_data = dataset['test']
tokenized_train = tokenizer(train_data['text'], truncation=True, padding=True, return_tensors="tf")
tokenized_test = tokenizer(test_data['text'], truncation=True, padding=True, return_tensors="tf")
outputs = model(**tokenized_test)
logits = outputs.logits
pred = tf.argmax(logits, axis=-1) |