ClinFly / utilities /get_model.py
GERNET Enody
Add files via upload
ccc4c35 unverified
import stanza
import nltk
import os
import spacy
import streamlit as st
from .web_utilities import st_cache_resource_if, supported_cache
from .translate import Translator
@st_cache_resource_if(supported_cache, max_entries=5, ttl=3600)
def get_models(langue,output=os.path.expanduser("~")):
if langue == "fr":
stanza.download(langue,dir = os.path.join(output,"stanza_resources"))
Translator(langue, "en")
elif langue == "de":
stanza.download(langue,dir = os.path.join(output,"stanza_resources"))
Translator(langue, "en")
else:
stanza.download(langue,dir = os.path.join(output,"stanza_resources"))
Translator(langue, "en")
if os.path.join(output,"nltk_data") not in nltk.data.path:
nltk.data.path.append(os.path.join(output,"nltk_data"))
try:
nltk.data.find("omw-1.4")
except LookupError:
nltk.download("omw-1.4",download_dir = os.path.join(output,"nltk_data"))
try:
nltk.data.find("wordnet")
except LookupError:
nltk.download("wordnet", download_dir = os.path.join(output,"nltk_data"))
spacy_model_name = "en_core_web_lg"
try:
nlp = spacy.load(os.path.join(output,spacy_model_name))
print(spacy_model_name + " already downloaded")
except OSError:
spacy.cli.download(spacy_model_name)
nlp = spacy.load(spacy_model_name)
nlp.to_disk(os.path.join(output,spacy_model_name))
@st_cache_resource_if(supported_cache, max_entries=5, ttl=3600)
def get_nlp_marian(source_lang):
nlp_fr = stanza.Pipeline(source_lang, processors="tokenize")
marian_fr_en = Translator(source_lang, "en")
return nlp_fr, marian_fr_en