Albin Thörn Cleland
Clean initial commit with LFS
19b8775
import glob
import logging
import os
import shutil
import stanza
from stanza.resources import installation
from stanza.tests import TEST_HOME_VAR, TEST_DIR_BASE_NAME
logger = logging.getLogger('stanza')
test_dir = os.getenv(TEST_HOME_VAR, None)
if not test_dir:
test_dir = os.path.join(os.getcwd(), TEST_DIR_BASE_NAME)
logger.info("STANZA_TEST_HOME not set. Will assume $PWD/stanza_test = %s", test_dir)
logger.info("To use a different directory, export or set STANZA_TEST_HOME=...")
in_dir = os.path.join(test_dir, "in")
out_dir = os.path.join(test_dir, "out")
scripts_dir = os.path.join(test_dir, "scripts")
models_dir=os.path.join(test_dir, "models")
corenlp_dir=os.path.join(test_dir, "corenlp_dir")
os.makedirs(test_dir, exist_ok=True)
os.makedirs(in_dir, exist_ok=True)
os.makedirs(out_dir, exist_ok=True)
os.makedirs(scripts_dir, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)
os.makedirs(corenlp_dir, exist_ok=True)
logger.info("COPYING FILES")
shutil.copy("stanza/tests/data/external_server.properties", scripts_dir)
shutil.copy("stanza/tests/data/example_french.json", out_dir)
shutil.copy("stanza/tests/data/aws_annotations.zip", in_dir)
for emb_file in glob.glob("stanza/tests/data/tiny_emb.*"):
shutil.copy(emb_file, in_dir)
logger.info("DOWNLOADING MODELS")
stanza.download(lang='en', model_dir=models_dir, logging_level='info')
stanza.download(lang="en", model_dir=models_dir, package=None, processors={"ner":"ncbi_disease"})
stanza.download(lang='fr', model_dir=models_dir, logging_level='info')
# Latin ITTB has no case information for the lemmatizer
stanza.download(lang='he', model_dir=models_dir, processors='tokenize', logging_level='info')
stanza.download(lang='la', model_dir=models_dir, package='ittb', logging_level='info')
stanza.download(lang='zh', model_dir=models_dir, logging_level='info')
# useful not just for verifying RtL, but because the default Arabic has a unique style of xpos tags
stanza.download(lang='ar', model_dir=models_dir, logging_level='info')
stanza.download(lang='multilingual', model_dir=models_dir, logging_level='info')
logger.info("DOWNLOADING CORENLP")
installation.install_corenlp(dir=corenlp_dir)
installation.download_corenlp_models(model="french", version="main", dir=corenlp_dir)
installation.download_corenlp_models(model="german", version="main", dir=corenlp_dir)
installation.download_corenlp_models(model="italian", version="main", dir=corenlp_dir)
installation.download_corenlp_models(model="spanish", version="main", dir=corenlp_dir)
logger.info("Test setup completed.")