learn / langchain /docs /scripts /cache_data.py
unfair11212's picture
Upload folder using huggingface_hub
a80f6e6 verified
import tiktoken
from unstructured.nlp.tokenize import download_nltk_packages
def download_tiktoken_data():
# This will trigger the download and caching of the necessary files
_ = tiktoken.encoding_for_model("gpt2")
_ = tiktoken.encoding_for_model("gpt-3.5-turbo")
_ = tiktoken.encoding_for_model("gpt-4o-mini")
if __name__ == "__main__":
download_tiktoken_data()
download_nltk_packages()