|
|
#!/usr/bin/env bash |
|
|
set -euo pipefail |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if [ "$#" -lt 2 ]; then |
|
|
echo "Usage: $0 <pretrained_model> <lang1> <lang2> ..." |
|
|
exit 1 |
|
|
fi |
|
|
|
|
|
PRETRAINED_MODEL="$1" |
|
|
shift |
|
|
LANGCODES=("$@") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mkdir -p logs |
|
|
|
|
|
timestamp=$(date +"%Y%m%d_%H%M%S") |
|
|
|
|
|
|
|
|
LANG_JOINED=$(printf "_%s" "${LANGCODES[@]}") |
|
|
LANG_JOINED="${LANG_JOINED:1}" |
|
|
|
|
|
|
|
|
LOGFILE="logs/log_${PRETRAINED_MODEL}_${LANG_JOINED}_${timestamp}.txt" |
|
|
|
|
|
|
|
|
exec > >(tee -a "$LOGFILE") 2>&1 |
|
|
|
|
|
echo "=== LOGFILE: $LOGFILE ===" |
|
|
echo "Language codes: ${LANGCODES[*]}" |
|
|
echo "Using pretrained model: $PRETRAINED_MODEL" |
|
|
echo |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo "Running: python prepare-train-val-test.py ${LANGCODES[*]}" |
|
|
python prepare-train-val-test.py "${LANGCODES[@]}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo "Sourcing scripts/config_alvis.sh" |
|
|
source scripts/config_alvis.sh |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo "Running stanza dataset preparation…" |
|
|
python -m stanza.utils.datasets.prepare_depparse_treebank UD_Swedish-diachronic \ |
|
|
--wordvec_pretrain_file "/cephyr/users/cleland/Alvis/stanza_resources/sv/pretrain/${PRETRAINED_MODEL}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo "Running stanza dependency parser training…" |
|
|
python -m stanza.utils.training.run_depparse UD_Swedish-diachronic \ |
|
|
--wordvec_pretrain_file "/cephyr/users/cleland/Alvis/stanza_resources/sv/pretrain/${PRETRAINED_MODEL}" \ |
|
|
--batch_size 32 \ |
|
|
--dropout 0.33 |
|
|
|
|
|
echo "DONE." |
|
|
echo "Full log saved to: $LOGFILE" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ln -sf "$(basename "$LOGFILE")" logs/latest.txt |
|
|
echo "Symlink updated: logs/latest.txt → $(basename "$LOGFILE")" |