```shell TRAIN_LAYERS=(7 15 23) TEST_GRID_SIZES=(7 9 11 13 15) PROBING_LOCATION_NAMES=("suffix" "output") # Pre- and post-reasoning ACTIVATIONS_TRAIN_PATH="interp/activations_train_single_step" TRAJECTORIES_TRAIN_PATH="reveng/trajectories_train_single_step" ACTIVATION_TEST_PATH="interp/activations_test_full" TRAJECTORIES_TEST_PATH="reveng/trajectories_test_full" TRAJECTORIES_OUTPUT_PATH="reveng/trajectories_test_full_with_probes" PROBES_PATH="interp/probes_train_single_step" PROBE_TYPES=("mlp" "lr") MLP_PROBE_HIDDEN_DIMS=1024 PROBE_LEARNING_RATE=3e-4 PROBE_WEIGHT_DECAY=0.001 PROBE_NUM_EPOCHS=50 PROBE_BATCH_SIZE=2048 PROBE_DROPOUT=0.0 PROBE_EVAL_SPLIT=0.005 PROBE_PER_CLASS_MAX_COUNT=357000 # ~10x minority classes for general probes (36k train grids) for layer in "${TRAIN_LAYERS[@]}"; do echo "=== Training and evaluating probes on layer ${layer} activations ===" for location_name in "${PROBING_LOCATION_NAMES[@]}"; do if [ "$location_name" == "suffix" ]; then location="pre_reasoning" echo "- Preparing padded activations for general probes (${location})" interp-cli prepare_activations_for_probing \ --trajectories-dir $TRAJECTORIES_TRAIN_PATH \ --activations-dir $ACTIVATIONS_TRAIN_PATH \ --probe-type grid_tile \ --layers $layer \ --prompt_suffix_indices all for size in "${TEST_GRID_SIZES[@]}"; do echo "- Preparing activations for size-specific probes (${location}, grid size ${size}x${size})" interp-cli prepare_activations_for_probing \ --trajectories-dir $TRAJECTORIES_TRAIN_PATH/size${size} \ --activations-dir $ACTIVATIONS_TRAIN_PATH/size${size} \ --probe-type grid_tile \ --layers ${layer} \ --prompt_suffix_indices all done elif [ "$location_name" == "output" ]; then location="post_reasoning" echo "- Preparing padded activations for general probes (${location})" interp-cli prepare_activations_for_probing \ --trajectories-dir $TRAJECTORIES_TRAIN_PATH \ --activations-dir $ACTIVATIONS_TRAIN_PATH \ --probe-type grid_tile \ --layers $layer \ --output_indices all for size in "${TEST_GRID_SIZES[@]}"; do echo "- Preparing activations for size-specific probes (${location}, grid size ${size}x${size})" interp-cli prepare_activations_for_probing \ --trajectories-dir $TRAJECTORIES_TRAIN_PATH/size${size} \ --activations-dir $ACTIVATIONS_TRAIN_PATH/size${size} \ --probe-type grid_tile \ --layers ${layer} \ --output_indices all done fi echo "" echo "Activation preparation completed!" echo "" for probe_type in "${PROBE_TYPES[@]}"; do echo "- Training general ${probe_type} probe on all ${location} activations" CURRENT_PROBE_PATH="${PROBES_PATH}/cognitive_map_probe_layer${layer}_${probe_type}_${location}_all_general.pt" interp-cli train_cognitive_map_probe \ --train-data-path ${ACTIVATIONS_TRAIN_PATH}/cognitive_map_activations_l${layer}_s0_${location_name}_all_grid_tile_pad15_merged.pt \ --output-path $CURRENT_PROBE_PATH \ --model-type ${probe_type} \ --hidden-dims $MLP_PROBE_HIDDEN_DIMS \ --learning-rate $PROBE_LEARNING_RATE \ --weight-decay $PROBE_WEIGHT_DECAY \ --dropout $PROBE_DROPOUT \ --num-epochs $PROBE_NUM_EPOCHS \ --batch-size $PROBE_BATCH_SIZE \ --eval-split $PROBE_EVAL_SPLIT \ --per-class-max-count $PROBE_PER_CLASS_MAX_COUNT \ --device cuda \ --verbose \ --normalize \ --subset 1.0 \ --balance-classes echo "- Evaluating general ${probe_type} probe on all ${location} activations" EVAL_OUTPUT_DIR="${TRAJECTORIES_OUTPUT_PATH}/layer${layer}/${probe_type}_general/${location}" EVAL_OUTPUT_BASE="eval_cognitive_map_probe_layer${layer}_${probe_type}_${location}_all_general" mkdir -p "${EVAL_OUTPUT_DIR}" interp-cli eval_cognitive_map_probe \ --trajectories-dir $TRAJECTORIES_TEST_PATH \ --activations-dir $ACTIVATION_TEST_PATH \ --probe-path $CURRENT_PROBE_PATH \ --layers $layer \ --steps all \ --output-indices all \ --pad-to-size 15 \ --output_path "${EVAL_OUTPUT_DIR}/${EVAL_OUTPUT_BASE}.json" \ --verbose \ | tee "${EVAL_OUTPUT_DIR}/${EVAL_OUTPUT_BASE}.txt" echo "- Generating predictions for test trajectories for general ${probe_type} probe on all ${location} activations" for size in "${TEST_GRID_SIZES[@]}"; do interp-cli apply_cognitive_map_probe \ --trajectories-dir $TRAJECTORIES_TEST_PATH/size${size} \ --activations-dir $ACTIVATION_TEST_PATH/size${size} \ --probe-path $CURRENT_PROBE_PATH \ --output-dir ${TRAJECTORIES_OUTPUT_PATH}/layer${layer}/${probe_type}_general/${location}/size${size} \ --layers $layer \ --steps all \ --output-indices all done for size in "${TEST_GRID_SIZES[@]}"; do CURRENT_PROBE_PATH="${PROBES_PATH}/cognitive_map_probe_layer${layer}_${probe_type}_${location}_all_size${size}.pt" echo "- Training ${size}x${size} ${probe_type} probe on all ${location} activations" interp-cli train_cognitive_map_probe \ --train-data-path ${ACTIVATIONS_TRAIN_PATH}/cognitive_map_activations_l${layer}_s0_${location_name}_all_grid_tile_grid${size}.pt \ --output-path $CURRENT_PROBE_PATH \ --model-type ${probe_type} \ --hidden-dims $MLP_PROBE_HIDDEN_DIMS \ --learning-rate $PROBE_LEARNING_RATE \ --weight-decay $PROBE_WEIGHT_DECAY \ --dropout $PROBE_DROPOUT \ --num-epochs $PROBE_NUM_EPOCHS \ --batch-size $PROBE_BATCH_SIZE \ --eval-split $PROBE_EVAL_SPLIT \ --per-class-max-count $PROBE_PER_CLASS_MAX_COUNT \ --device cuda \ --verbose \ --normalize \ --subset 1.0 \ --balance-classes echo "- Evaluating ${size}x${size} ${probe_type} probe on all ${location} activations" EVAL_OUTPUT_DIR="${TRAJECTORIES_OUTPUT_PATH}/layer${layer}/${probe_type}/${location}/size${size}" EVAL_OUTPUT_BASE="eval_cognitive_map_probe_layer${layer}_${probe_type}_${location}_all_size${size}" mkdir -p "${EVAL_OUTPUT_DIR}" interp-cli eval_cognitive_map_probe \ --trajectories-dir $TRAJECTORIES_TEST_PATH/size${size} \ --activations-dir $ACTIVATION_TEST_PATH/size${size} \ --probe-path $CURRENT_PROBE_PATH \ --layers $layer \ --steps all \ --output-indices all \ --pad-to-size $size \ --output_path "${EVAL_OUTPUT_DIR}/${EVAL_OUTPUT_BASE}.json" \ --verbose \ | tee "${EVAL_OUTPUT_DIR}/${EVAL_OUTPUT_BASE}.txt" echo "- Generating predictions for test trajectories for ${size}x${size} ${probe_type} probe on all ${location} activations" interp-cli apply_cognitive_map_probe \ --trajectories-dir $TRAJECTORIES_TEST_PATH/size${size} \ --activations-dir $ACTIVATION_TEST_PATH/size${size} \ --probe-path $CURRENT_PROBE_PATH \ --output-dir ${TRAJECTORIES_OUTPUT_PATH}/layer${layer}/${probe_type}/${location}/size${size} \ --layers $layer \ --steps all \ --output-indices all done done done done ```