File size: 416 Bytes
a80f6e6
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import tiktoken
from unstructured.nlp.tokenize import download_nltk_packages


def download_tiktoken_data():
    # This will trigger the download and caching of the necessary files
    _ = tiktoken.encoding_for_model("gpt2")
    _ = tiktoken.encoding_for_model("gpt-3.5-turbo")
    _ = tiktoken.encoding_for_model("gpt-4o-mini")


if __name__ == "__main__":
    download_tiktoken_data()
    download_nltk_packages()