stanza-digphil / make_new_model.sh
Albin Thörn Cleland
new models
b01c18c
#!/usr/bin/env bash
set -euo pipefail
# ================================
# USAGE:
# ./make_new_model.sh diachronic.pt sv diachron ...
# ================================
if [ "$#" -lt 2 ]; then
echo "Usage: $0 <pretrained_model> <lang1> <lang2> ..."
exit 1
fi
PRETRAINED_MODEL="$1"
shift
LANGCODES=("$@")
# ========================================
# 0. SET UP LOGGING
# ========================================
mkdir -p logs
timestamp=$(date +"%Y%m%d_%H%M%S")
# Join language codes: sv_nn_da
LANG_JOINED=$(printf "_%s" "${LANGCODES[@]}")
LANG_JOINED="${LANG_JOINED:1}"
# Build log filename
LOGFILE="logs/log_${PRETRAINED_MODEL}_${LANG_JOINED}_${timestamp}.txt"
# Redirect output to tee
exec > >(tee -a "$LOGFILE") 2>&1
echo "=== LOGFILE: $LOGFILE ==="
echo "Language codes: ${LANGCODES[*]}"
echo "Using pretrained model: $PRETRAINED_MODEL"
echo
# ========================================
# 1. PREPARE TRAIN/VAL/TEST SPLITS
# ========================================
echo "Running: python prepare-train-val-test.py ${LANGCODES[*]}"
python prepare-train-val-test.py "${LANGCODES[@]}"
# ========================================
# 2. LOAD ALVIS CONFIG
# ========================================
echo "Sourcing scripts/config_alvis.sh"
source scripts/config_alvis.sh
# ========================================
# 3. PREPARE STANZA DATASET
# ========================================
echo "Running stanza dataset preparation…"
python -m stanza.utils.datasets.prepare_depparse_treebank UD_Swedish-diachronic \
--wordvec_pretrain_file "/cephyr/users/cleland/Alvis/stanza_resources/sv/pretrain/${PRETRAINED_MODEL}"
# ========================================
# 4. TRAIN THE DEPENDENCY PARSER
# ========================================
echo "Running stanza dependency parser training…"
python -m stanza.utils.training.run_depparse UD_Swedish-diachronic \
--wordvec_pretrain_file "/cephyr/users/cleland/Alvis/stanza_resources/sv/pretrain/${PRETRAINED_MODEL}" \
--batch_size 32 \
--dropout 0.33
echo "DONE."
echo "Full log saved to: $LOGFILE"
# ========================================
# 5. UPDATE 'latest.txt' SYMLINK
# ========================================
ln -sf "$(basename "$LOGFILE")" logs/latest.txt
echo "Symlink updated: logs/latest.txt → $(basename "$LOGFILE")"