| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -euo pipefail |
|
|
| RED='\033[0;31m' |
| GREEN='\033[0;32m' |
| YELLOW='\033[1;33m' |
| NC='\033[0m' |
|
|
| SSH_HOST="${1:?Usage: $0 <ssh_host> <ssh_port> [--restart-worker]}" |
| SSH_PORT="${2:?Usage: $0 <ssh_host> <ssh_port> [--restart-worker]}" |
| RESTART_WORKER="${3:-}" |
|
|
| LOCAL_DIR="/home/ubuntu/ascad-training-pipeline" |
| REMOTE_DIR="/root/ascad-training-pipeline" |
| SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=15" |
|
|
| log() { echo -e "${GREEN}[DEPLOY]${NC} $*"; } |
| warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } |
| fail() { echo -e "${RED}[FAIL]${NC} $*"; exit "${2:-1}"; } |
|
|
| |
| log "Testing SSH connectivity to ${SSH_HOST}:${SSH_PORT}..." |
| if ! ssh ${SSH_OPTS} -p "${SSH_PORT}" "root@${SSH_HOST}" "echo OK" 2>/dev/null; then |
| fail "Cannot connect to ${SSH_HOST}:${SSH_PORT}" 3 |
| fi |
| log "SSH connection OK" |
|
|
| |
| log "Computing local checksums..." |
| LOCAL_CHECKSUMS=$(cd "${LOCAL_DIR}" && find src/ scripts/ -name "*.py" -o -name "*.sh" | sort | xargs md5sum) |
| echo "${LOCAL_CHECKSUMS}" > /tmp/deploy_local_checksums.txt |
| LOCAL_COUNT=$(echo "${LOCAL_CHECKSUMS}" | wc -l) |
| log "Found ${LOCAL_COUNT} files to deploy" |
|
|
| |
| log "Deploying src/ and scripts/ to worker..." |
| scp ${SSH_OPTS} -P "${SSH_PORT}" -r "${LOCAL_DIR}/src" "root@${SSH_HOST}:${REMOTE_DIR}/" |
| scp ${SSH_OPTS} -P "${SSH_PORT}" -r "${LOCAL_DIR}/scripts" "root@${SSH_HOST}:${REMOTE_DIR}/" |
|
|
| |
| for f in train_mtl.py worker_agent.py; do |
| if [ -f "${LOCAL_DIR}/${f}" ]; then |
| scp ${SSH_OPTS} -P "${SSH_PORT}" "${LOCAL_DIR}/${f}" "root@${SSH_HOST}:${REMOTE_DIR}/" |
| fi |
| done |
| log "Files transferred" |
|
|
| |
| log "Clearing __pycache__ on worker..." |
| ssh ${SSH_OPTS} -p "${SSH_PORT}" "root@${SSH_HOST}" \ |
| "find ${REMOTE_DIR} -name '__pycache__' -type d -exec rm -rf {} + 2>/dev/null; echo 'Cleared'" |
| log "__pycache__ cleared" |
|
|
| |
| log "Verifying remote checksums..." |
| REMOTE_CHECKSUMS=$(ssh ${SSH_OPTS} -p "${SSH_PORT}" "root@${SSH_HOST}" \ |
| "cd ${REMOTE_DIR} && find src/ scripts/ -name '*.py' -o -name '*.sh' | sort | xargs md5sum") |
| echo "${REMOTE_CHECKSUMS}" > /tmp/deploy_remote_checksums.txt |
|
|
| |
| MISMATCH=0 |
| while IFS= read -r line; do |
| local_md5=$(echo "$line" | awk '{print $1}') |
| local_file=$(echo "$line" | awk '{print $2}') |
| remote_line=$(echo "${REMOTE_CHECKSUMS}" | grep "${local_file}$" || true) |
| if [ -z "${remote_line}" ]; then |
| warn "File missing on worker: ${local_file}" |
| MISMATCH=1 |
| else |
| remote_md5=$(echo "${remote_line}" | awk '{print $1}') |
| if [ "${local_md5}" != "${remote_md5}" ]; then |
| warn "Checksum mismatch: ${local_file} (local=${local_md5}, remote=${remote_md5})" |
| MISMATCH=1 |
| fi |
| fi |
| done <<< "${LOCAL_CHECKSUMS}" |
| if [ "${MISMATCH}" -eq 1 ]; then |
| fail "Checksum verification failed! See warnings above." 1 |
| fi |
| log "All ${LOCAL_COUNT} local files verified — checksums match on worker" |
|
|
| |
| log "Running dry-run evaluation test on worker..." |
|
|
| |
| cat > /tmp/test_eval_dryrun.py << 'PYEOF' |
| |
| """Quick dry-run test for evaluation code on the worker.""" |
| import sys |
| import os |
| import numpy as np |
|
|
| sys.path.insert(0, "/root/ascad-training-pipeline") |
| os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" |
|
|
| from src.constants import AES_SBOX, NUM_CLASSES |
|
|
| |
| sbox_out = AES_SBOX[np.uint8(42) ^ np.uint8(0)] |
| assert isinstance(sbox_out, (int, np.integer)), f"sbox_out type wrong: {type(sbox_out)}" |
| print(f"[PASS] AES_SBOX indexing: type={type(sbox_out).__name__}, val={sbox_out}") |
|
|
| |
| raw_predictions = {f"byte_{i}": np.random.rand(10, 256).astype(np.float32) for i in range(16)} |
| output_index = 0 |
| key = f"byte_{output_index}" |
| predictions = raw_predictions[key] |
| assert predictions.shape == (10, 256), f"Shape wrong: {predictions.shape}" |
| assert isinstance(predictions[0][sbox_out], (float, np.floating)), f"Indexing wrong: {type(predictions[0][sbox_out])}" |
| print(f"[PASS] Dict extraction: predictions shape={predictions.shape}") |
|
|
| |
| from src.evaluation import compute_key_rank |
| metadata_dtype = np.dtype([('plaintext', np.uint8, (16,)), ('key', np.uint8, (16,))]) |
| metadata = np.zeros(10, dtype=metadata_dtype) |
| for i in range(10): |
| metadata[i]['plaintext'] = np.random.randint(0, 256, 16, dtype=np.uint8) |
| metadata[i]['key'] = np.array([0x4D] * 16, dtype=np.uint8) |
|
|
| ranks_array, final_rank = compute_key_rank( |
| predictions=predictions, metadata=metadata, |
| real_key=0x4D, target_byte=0, num_traces=10, rank_step=5 |
| ) |
| assert isinstance(final_rank, int), f"final_rank type wrong: {type(final_rank)}" |
| print(f"[PASS] compute_key_rank: final_rank={final_rank}") |
|
|
| |
| import tensorflow as tf |
| inp = tf.keras.Input(shape=(100, 1)) |
| x = tf.keras.layers.Flatten()(inp) |
| x = tf.keras.layers.Dense(32, activation='relu')(x) |
| outputs = {} |
| for byte_idx in range(16): |
| outputs[f"byte_{byte_idx}"] = tf.keras.layers.Dense(256, activation='softmax', name=f"byte_{byte_idx}")(x) |
| model = tf.keras.Model(inputs=inp, outputs=outputs, name="test_hps") |
|
|
| from src.evaluation import evaluate_model |
| attack_traces = np.random.rand(20, 100).astype(np.float32) |
| attack_metadata = np.zeros(20, dtype=metadata_dtype) |
| for i in range(20): |
| attack_metadata[i]['plaintext'] = np.random.randint(0, 256, 16, dtype=np.uint8) |
| attack_metadata[i]['key'] = np.array([0x4D] * 16, dtype=np.uint8) |
|
|
| for byte_idx in [0, 7, 15]: |
| result = evaluate_model( |
| model=model, attack_traces=attack_traces, attack_metadata=attack_metadata, |
| target_byte=byte_idx, real_key=0x4D, model_type="mtan", |
| num_traces=20, output_index=byte_idx |
| ) |
| assert 'final_rank' in result, f"Missing final_rank for byte {byte_idx}" |
| print(f"[PASS] evaluate_model byte_{byte_idx}: final_rank={result['final_rank']}") |
|
|
| print("\n=== ALL TESTS PASSED ===") |
| PYEOF |
|
|
| scp ${SSH_OPTS} -P "${SSH_PORT}" /tmp/test_eval_dryrun.py "root@${SSH_HOST}:/tmp/test_eval_dryrun.py" |
|
|
| |
| TEST_OUTPUT=$(ssh ${SSH_OPTS} -p "${SSH_PORT}" "root@${SSH_HOST}" \ |
| "cd ${REMOTE_DIR} && python3 /tmp/test_eval_dryrun.py 2>&1") |
| echo "${TEST_OUTPUT}" |
|
|
| if echo "${TEST_OUTPUT}" | grep -q "ALL TESTS PASSED"; then |
| log "Dry-run test PASSED on worker" |
| else |
| fail "Dry-run test FAILED on worker:\n${TEST_OUTPUT}" 2 |
| fi |
|
|
| |
| if [ "${RESTART_WORKER}" = "--restart-worker" ]; then |
| log "Restarting worker agent..." |
| |
| |
| ssh ${SSH_OPTS} -p "${SSH_PORT}" "root@${SSH_HOST}" " |
| pkill -f worker_agent.py 2>/dev/null || true |
| pkill -f train_mtl.py 2>/dev/null || true |
| sleep 2 |
| |
| # Verify processes are dead |
| if pgrep -f 'worker_agent.py|train_mtl.py' > /dev/null 2>&1; then |
| pkill -9 -f 'worker_agent.py|train_mtl.py' 2>/dev/null || true |
| sleep 1 |
| fi |
| echo 'Processes killed' |
| " |
| |
| |
| ssh ${SSH_OPTS} -p "${SSH_PORT}" "root@${SSH_HOST}" " |
| cd ${REMOTE_DIR} |
| screen -dmS worker bash -c ' |
| python3 -m orchestrator.worker.agent \ |
| --server-url \"\${TQ_SERVER_URL}\" \ |
| --worker-id \"\${WORKER_ID:-worker-\$(hostname)}\" \ |
| --data-dir /root/ascad_data \ |
| --pipeline-dir ${REMOTE_DIR} \ |
| --auth-user \"\${TQ_AUTH_USER:-admin}\" \ |
| --auth-pass \"\${TQ_AUTH_PASS}\" \ |
| --forward-logs /root/worker.log \ |
| 2>&1 | tee -a /root/worker.log |
| ' |
| sleep 2 |
| if screen -list | grep -q worker; then |
| echo 'Worker started in screen session' |
| else |
| echo 'ERROR: Worker failed to start' |
| exit 1 |
| fi |
| " |
| log "Worker agent restarted" |
| fi |
|
|
| echo "" |
| log "=========================================" |
| log " DEPLOYMENT COMPLETE — ALL CHECKS PASSED" |
| log "=========================================" |
|
|