File size: 2,344 Bytes
19b8775 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
#!/bin/bash
#
# Set environment variables for the training and testing of stanza modules.
# Set UDBASE to the location of UD data folder
# The data should be CoNLL-U format
# For details, see
# http://universaldependencies.org/conll18/data.html (CoNLL-18 UD data)
# https://universaldependencies.org/
# When rebuilding models based on Universal Dependencies, download the
# UD data to some directory, set UDBASE to that directory, and
# uncomment this line. Alternatively, put UDBASE in your shell
# config, Windows env variables, etc as relevant.
export UDBASE=/mimer/NOBACKUP/groups/dionysus/cleland/stanza-digphil/ud
# Set NERBASE to the location of NER data folder
# The data should be BIO format or convertable to that format
# For details, see https://www.aclweb.org/anthology/W03-0419.pdf (CoNLL-03 NER paper)
# There are other NER datasets, supported in
# stanza/utils/datasets/ner/prepare_ner_dataset.py
# If rebuilding NER data, choose a location for the NER directory
# and set NERBASE to that variable.
# export NERBASE=/path/to/NER
# Set CONSTITUENCY_BASE to the location of NER data folder
# The data will be in some dataset-specific format
# There is a conversion script which will turn this
# into a PTB style format
# stanza/utils/datasets/constituency/prepare_con_dataset.py
# If processing constituency data, choose a location for the CON data
# and set CONSTITUENCY_BASE to that variable.
# export CONSTITUENCY_BASE=/path/to/CON
# Set directories to store processed training/evaluation files
# $DATA_ROOT is a default home for where all the outputs from the
# preparation scripts will go. The training scripts will then look
# for the stanza formatted data in that directory.
export DATA_ROOT=/mimer/NOBACKUP/groups/dionysus/cleland/stanza-digphil/data
export TOKENIZE_DATA_DIR=$DATA_ROOT/tokenize
export MWT_DATA_DIR=$DATA_ROOT/mwt
export LEMMA_DATA_DIR=$DATA_ROOT/lemma
export POS_DATA_DIR=$DATA_ROOT/pos
export DEPPARSE_DATA_DIR=$DATA_ROOT/depparse
export ETE_DATA_DIR=$DATA_ROOT/ete
export NER_DATA_DIR=$DATA_ROOT/ner
export CHARLM_DATA_DIR=$DATA_ROOT/charlm
export CONSTITUENCY_DATA_DIR=$DATA_ROOT/constituency
export SENTIMENT_DATA_DIR=$DATA_ROOT/sentiment
# Set directories to store external word vector data
export WORDVEC_DIR=/cephyr/users/cleland/Alvis/stanza_resources/sv/pretrain
|