GenSeg-Baselines / code /scripts /h800_parallel_extract.sh
MaybeRichard's picture
code: complete eval pipeline (7 metrics + per-class + Wilcoxon) + Swin-UNet/TransUNet networks; remove backups/obsolete
1a18f22 verified
Raw
History Blame Contribute Delete
1.2 kB
#!/usr/bin/env bash
# Parallel extraction of the remaining GenSegDataset tars on h800's slow network share.
# Big archives are split by member-list into N chunks, each extracted by a separate
# `tar -x -T <chunk>` process, to saturate the share's parallel small-file throughput.
set -u
BASE=/mnt/tidal-alsh-share2/dataset/qinshengqian/research/c3/NPJ-ACM/Data
TARS=$BASE/_tars
WORK=/tmp/pextract
mkdir -p "$WORK"
# dataset -> parallel chunk count (kits19 = most files)
launch_ds() {
local ds=$1 n=$2 tar="$TARS/$1.tar"
[ -f "$tar" ] || { echo "MISSING $tar"; return; }
if [ "$n" -le 1 ]; then
tar -xf "$tar" -C "$BASE" &
else
tar -tf "$tar" | grep -v '/$' > "$WORK/$ds.list"
split -n "l/$n" -d "$WORK/$ds.list" "$WORK/$ds.chunk."
for c in "$WORK/$ds.chunk."*; do
tar -xf "$tar" -C "$BASE" -T "$c" &
done
fi
}
echo "[start] $(date +%T) launching parallel extraction"
launch_ds medsegdb_kits19 8
launch_ds pannuke_semantic 4
launch_ds refuge2 1
echo "launched $(jobs -p | wc -l) parallel tar streams"
wait
echo "PEXTRACT_DONE $(date +%T)"
# cleanup tars + work
rm -f "$TARS"/*.tar
rmdir "$TARS" 2>/dev/null || true
rm -rf "$WORK"
echo "CLEANUP_DONE $(date +%T)"