File size: 675 Bytes
27efcf2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from datasets import load_dataset
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
import tensorflow as tf

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

model = TFAutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased")

dataset = load_dataset('imdb')
train_data = dataset['train']
test_data = dataset['test']

tokenized_train = tokenizer(train_data['text'], truncation=True, padding=True, return_tensors="tf")
tokenized_test = tokenizer(test_data['text'], truncation=True, padding=True, return_tensors="tf")

outputs = model(**tokenized_test)
logits = outputs.logits
pred = tf.argmax(logits, axis=-1)