#!/usr/bin/env bash set -euo pipefail # ================================ # USAGE: # ./make_new_model.sh diachronic.pt sv diachron ... # ================================ if [ "$#" -lt 2 ]; then echo "Usage: $0 ..." exit 1 fi PRETRAINED_MODEL="$1" shift LANGCODES=("$@") # ======================================== # 0. SET UP LOGGING # ======================================== mkdir -p logs timestamp=$(date +"%Y%m%d_%H%M%S") # Join language codes: sv_nn_da LANG_JOINED=$(printf "_%s" "${LANGCODES[@]}") LANG_JOINED="${LANG_JOINED:1}" # Build log filename LOGFILE="logs/log_${PRETRAINED_MODEL}_${LANG_JOINED}_${timestamp}.txt" # Redirect output to tee exec > >(tee -a "$LOGFILE") 2>&1 echo "=== LOGFILE: $LOGFILE ===" echo "Language codes: ${LANGCODES[*]}" echo "Using pretrained model: $PRETRAINED_MODEL" echo # ======================================== # 1. PREPARE TRAIN/VAL/TEST SPLITS # ======================================== echo "Running: python prepare-train-val-test.py ${LANGCODES[*]}" python prepare-train-val-test.py "${LANGCODES[@]}" # ======================================== # 2. LOAD ALVIS CONFIG # ======================================== echo "Sourcing scripts/config_alvis.sh" source scripts/config_alvis.sh # ======================================== # 3. PREPARE STANZA DATASET # ======================================== echo "Running stanza dataset preparation…" python -m stanza.utils.datasets.prepare_depparse_treebank UD_Swedish-diachronic \ --wordvec_pretrain_file "/cephyr/users/cleland/Alvis/stanza_resources/sv/pretrain/${PRETRAINED_MODEL}" # ======================================== # 4. TRAIN THE DEPENDENCY PARSER # ======================================== echo "Running stanza dependency parser training…" python -m stanza.utils.training.run_depparse UD_Swedish-diachronic \ --wordvec_pretrain_file "/cephyr/users/cleland/Alvis/stanza_resources/sv/pretrain/${PRETRAINED_MODEL}" \ --batch_size 32 \ --dropout 0.33 echo "DONE." echo "Full log saved to: $LOGFILE" # ======================================== # 5. UPDATE 'latest.txt' SYMLINK # ======================================== ln -sf "$(basename "$LOGFILE")" logs/latest.txt echo "Symlink updated: logs/latest.txt → $(basename "$LOGFILE")"