César Trujillo commited on
Commit
b880319
·
verified ·
1 Parent(s): 964a387

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +20 -3
README.md CHANGED
@@ -1,3 +1,20 @@
1
- ---
2
- license: unknown
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: unknown
3
+ ---
4
+ pip install transformers
5
+
6
+ from transformers import GPT2Tokenizer
7
+
8
+ # Initialize the GPT-2 tokenizer
9
+ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
10
+
11
+ # Text to tokenize
12
+ text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
13
+
14
+ # Tokenizing the text
15
+ tokens = tokenizer.tokenize(text)
16
+ token_ids = tokenizer.convert_tokens_to_ids(tokens)
17
+
18
+ # Print tokens and token IDs
19
+ print(tokens)
20
+ print(token_ids)