import stanza import nltk import os import spacy import streamlit as st from .web_utilities import st_cache_resource_if, supported_cache from .translate import Translator @st_cache_resource_if(supported_cache, max_entries=5, ttl=3600) def get_models(langue,output=os.path.expanduser("~")): if langue == "fr": stanza.download(langue,dir = os.path.join(output,"stanza_resources")) Translator(langue, "en") elif langue == "de": stanza.download(langue,dir = os.path.join(output,"stanza_resources")) Translator(langue, "en") else: stanza.download(langue,dir = os.path.join(output,"stanza_resources")) Translator(langue, "en") if os.path.join(output,"nltk_data") not in nltk.data.path: nltk.data.path.append(os.path.join(output,"nltk_data")) try: nltk.data.find("omw-1.4") except LookupError: nltk.download("omw-1.4",download_dir = os.path.join(output,"nltk_data")) try: nltk.data.find("wordnet") except LookupError: nltk.download("wordnet", download_dir = os.path.join(output,"nltk_data")) spacy_model_name = "en_core_web_lg" try: nlp = spacy.load(os.path.join(output,spacy_model_name)) print(spacy_model_name + " already downloaded") except OSError: spacy.cli.download(spacy_model_name) nlp = spacy.load(spacy_model_name) nlp.to_disk(os.path.join(output,spacy_model_name)) @st_cache_resource_if(supported_cache, max_entries=5, ttl=3600) def get_nlp_marian(source_lang): nlp_fr = stanza.Pipeline(source_lang, processors="tokenize") marian_fr_en = Translator(source_lang, "en") return nlp_fr, marian_fr_en