File size: 272 Bytes
ae35270
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
PRE_TRAINED_MODEL = 'bert-base-uncased'

from transformers import BertTokenizer

bert_tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL)

id_data = 3

print('Kalimat:', raw_data.Text[id_data])
print('BERT Tokenizer:', bert_tokenizer.tokenize(data.Text[id_data]))