File size: 221 Bytes
2e7eba3
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
token_lens = []

for txt in data.Text:
  tokens = bert_tokenizer.encode(txt)
  token_lens.append(len(tokens))

sns.histplot(token_lens, kde=True, stat='density', linewidth=0)
plt.xlim([0, 100]);
plt.xlabel('Token count');