| # | |
| # Set environment variables for the training and testing of stanza modules. | |
| # Set UDBASE to the location of UD data folder | |
| # The data should be CoNLL-U format | |
| # For details, see | |
| # http://universaldependencies.org/conll18/data.html (CoNLL-18 UD data) | |
| # https://universaldependencies.org/ | |
| # When rebuilding models based on Universal Dependencies, download the | |
| # UD data to some directory, set UDBASE to that directory, and | |
| # uncomment this line. Alternatively, put UDBASE in your shell | |
| # config, Windows env variables, etc as relevant. | |
| export UDBASE=/mimer/NOBACKUP/groups/dionysus/cleland/stanza-digphil/ud | |
| # Set NERBASE to the location of NER data folder | |
| # The data should be BIO format or convertable to that format | |
| # For details, see https://www.aclweb.org/anthology/W03-0419.pdf (CoNLL-03 NER paper) | |
| # There are other NER datasets, supported in | |
| # stanza/utils/datasets/ner/prepare_ner_dataset.py | |
| # If rebuilding NER data, choose a location for the NER directory | |
| # and set NERBASE to that variable. | |
| # export NERBASE=/path/to/NER | |
| # Set CONSTITUENCY_BASE to the location of NER data folder | |
| # The data will be in some dataset-specific format | |
| # There is a conversion script which will turn this | |
| # into a PTB style format | |
| # stanza/utils/datasets/constituency/prepare_con_dataset.py | |
| # If processing constituency data, choose a location for the CON data | |
| # and set CONSTITUENCY_BASE to that variable. | |
| # export CONSTITUENCY_BASE=/path/to/CON | |
| # Set directories to store processed training/evaluation files | |
| # $DATA_ROOT is a default home for where all the outputs from the | |
| # preparation scripts will go. The training scripts will then look | |
| # for the stanza formatted data in that directory. | |
| export DATA_ROOT=/mimer/NOBACKUP/groups/dionysus/cleland/stanza-digphil/data | |
| export TOKENIZE_DATA_DIR=$DATA_ROOT/tokenize | |
| export MWT_DATA_DIR=$DATA_ROOT/mwt | |
| export LEMMA_DATA_DIR=$DATA_ROOT/lemma | |
| export POS_DATA_DIR=$DATA_ROOT/pos | |
| export DEPPARSE_DATA_DIR=$DATA_ROOT/depparse | |
| export ETE_DATA_DIR=$DATA_ROOT/ete | |
| export NER_DATA_DIR=$DATA_ROOT/ner | |
| export CHARLM_DATA_DIR=$DATA_ROOT/charlm | |
| export CONSTITUENCY_DATA_DIR=$DATA_ROOT/constituency | |
| export SENTIMENT_DATA_DIR=$DATA_ROOT/sentiment | |
| # Set directories to store external word vector data | |
| export WORDVEC_DIR=/cephyr/users/cleland/Alvis/stanza_resources/sv/pretrain | |