metadata
license: unknown
pip install transformers
from transformers import GPT2Tokenizer
Initialize the GPT-2 tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
Text to tokenize
text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
Tokenizing the text
tokens = tokenizer.tokenize(text) token_ids = tokenizer.convert_tokens_to_ids(tokens)
Print tokens and token IDs
print(tokens) print(token_ids)