| #!/bin/bash |
| |
| |
| |
| |
| set -euo pipefail |
|
|
| GPU="root@199.126.203.145" |
| PORT=18732 |
| SSH="ssh -p $PORT" |
| RSYNC_SSH="ssh -p $PORT" |
| REMOTE_DIR="~/alkyline" |
| LOCAL_DIR="/home/ubuntu/alkyline" |
|
|
| DRY_RUN="" |
| if [[ "${1:-}" == "--dry-run" ]]; then |
| DRY_RUN="--dry-run" |
| echo "π DRY RUN β no files will be transferred" |
| echo "" |
| fi |
|
|
| RSYNC_BASE="rsync -avz --progress $DRY_RUN -e '$RSYNC_SSH'" |
|
|
| echo "================================================================" |
| echo " Arcspan R8 Sync β GPU ($GPU:$PORT)" |
| echo "================================================================" |
| echo "" |
|
|
| |
| echo "ββ [1/4] Vendor code (modified _train/ files) ββ" |
| eval $RSYNC_BASE \ |
| "$LOCAL_DIR/vendor/privacy-filter/opf/_train/runner.py" \ |
| "$LOCAL_DIR/vendor/privacy-filter/opf/_train/args.py" \ |
| "$GPU:$REMOTE_DIR/vendor/privacy-filter/opf/_train/" |
| echo "" |
|
|
| |
| echo "ββ [2/4] Data files (R8 train/valid + APTNER + SB2 test) ββ" |
| eval $RSYNC_BASE \ |
| "$LOCAL_DIR/data/processed/r8_5class_train.jsonl" \ |
| "$LOCAL_DIR/data/processed/r8_5class_valid.jsonl" \ |
| "$LOCAL_DIR/data/processed/aptner_5class_test_clean.jsonl" \ |
| "$LOCAL_DIR/data/processed/securebert2_5class_test.jsonl" \ |
| "$GPU:$REMOTE_DIR/data/processed/" |
| echo "" |
|
|
| |
| echo "ββ [3/4] Scripts (R8 training + utilities) ββ" |
| eval $RSYNC_BASE \ |
| "$LOCAL_DIR/scripts/run_train_v8.sh" \ |
| "$LOCAL_DIR/scripts/build_dataset.py" \ |
| "$LOCAL_DIR/scripts/viterbi_grid_search.py" \ |
| "$LOCAL_DIR/scripts/early_stop_monitor.sh" \ |
| "$LOCAL_DIR/scripts/checkpoint_avg.py" \ |
| "$GPU:$REMOTE_DIR/scripts/" |
| echo "" |
|
|
| |
| echo "ββ [4/4] Label spaces ββ" |
| eval $RSYNC_BASE \ |
| "$LOCAL_DIR/data/label_spaces/" \ |
| "$GPU:$REMOTE_DIR/data/label_spaces/" |
| echo "" |
|
|
| echo "================================================================" |
| if [[ -n "$DRY_RUN" ]]; then |
| echo " DRY RUN COMPLETE β re-run without --dry-run to transfer" |
| else |
| echo " SYNC COMPLETE" |
| fi |
| echo "================================================================" |
|
|
| |
| if [[ -z "$DRY_RUN" ]]; then |
| echo "" |
| echo "ββ Post-sync verification ββ" |
| $SSH $GPU bash -c "' |
| echo \"Checking critical files exist...\" |
| for f in \ |
| ~/alkyline/data/processed/r8_5class_train.jsonl \ |
| ~/alkyline/data/processed/r8_5class_valid.jsonl \ |
| ~/alkyline/data/processed/aptner_5class_test_clean.jsonl \ |
| ~/alkyline/data/processed/securebert2_5class_test.jsonl \ |
| ~/alkyline/scripts/run_train_v8.sh \ |
| ~/alkyline/data/label_spaces/cyner_5class.json; do |
| if [ -f \"\$f\" ]; then echo \" β
\$f\"; else echo \" β MISSING: \$f\"; fi |
| done |
| echo \"\" |
| echo \"Checking --o-downsample in vendor code...\" |
| if grep -q o.downsample ~/alkyline/vendor/privacy-filter/opf/_train/args.py; then |
| echo \" β
--o-downsample flag present in args.py\" |
| else |
| echo \" β --o-downsample NOT found β vendor sync failed!\" |
| fi |
| echo \"\" |
| echo \"Reinstalling opf (editable)...\" |
| cd ~/alkyline/vendor/privacy-filter && pip install -e . -q |
| echo \" β
opf reinstalled\" |
| '" |
| fi |
|
|