| token_lens = [] | |
| for txt in data.Text: | |
| tokens = bert_tokenizer.encode(txt) | |
| token_lens.append(len(tokens)) | |
| sns.histplot(token_lens, kde=True, stat='density', linewidth=0) | |
| plt.xlim([0, 100]); | |
| plt.xlabel('Token count'); |
| token_lens = [] | |
| for txt in data.Text: | |
| tokens = bert_tokenizer.encode(txt) | |
| token_lens.append(len(tokens)) | |
| sns.histplot(token_lens, kde=True, stat='density', linewidth=0) | |
| plt.xlim([0, 100]); | |
| plt.xlabel('Token count'); |