File size: 8,660 Bytes
1ef386c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
```shell
TRAIN_LAYERS=(7 15 23)
TEST_GRID_SIZES=(7 9 11 13 15)
PROBING_LOCATION_NAMES=("suffix" "output") # Pre- and post-reasoning
ACTIVATIONS_TRAIN_PATH="interp/activations_train_single_step"
TRAJECTORIES_TRAIN_PATH="reveng/trajectories_train_single_step"
ACTIVATION_TEST_PATH="interp/activations_test_full"
TRAJECTORIES_TEST_PATH="reveng/trajectories_test_full"
TRAJECTORIES_OUTPUT_PATH="reveng/trajectories_test_full_with_probes"
PROBES_PATH="interp/probes_train_single_step"
PROBE_TYPES=("mlp" "lr")
MLP_PROBE_HIDDEN_DIMS=1024
PROBE_LEARNING_RATE=3e-4
PROBE_WEIGHT_DECAY=0.001
PROBE_NUM_EPOCHS=50
PROBE_BATCH_SIZE=2048
PROBE_DROPOUT=0.0
PROBE_EVAL_SPLIT=0.005
PROBE_PER_CLASS_MAX_COUNT=357000 # ~10x minority classes for general probes (36k train grids)
for layer in "${TRAIN_LAYERS[@]}"; do
echo "=== Training and evaluating probes on layer ${layer} activations ==="
for location_name in "${PROBING_LOCATION_NAMES[@]}"; do
if [ "$location_name" == "suffix" ]; then
location="pre_reasoning"
echo "- Preparing padded activations for general probes (${location})"
interp-cli prepare_activations_for_probing \
--trajectories-dir $TRAJECTORIES_TRAIN_PATH \
--activations-dir $ACTIVATIONS_TRAIN_PATH \
--probe-type grid_tile \
--layers $layer \
--prompt_suffix_indices all
for size in "${TEST_GRID_SIZES[@]}"; do
echo "- Preparing activations for size-specific probes (${location}, grid size ${size}x${size})"
interp-cli prepare_activations_for_probing \
--trajectories-dir $TRAJECTORIES_TRAIN_PATH/size${size} \
--activations-dir $ACTIVATIONS_TRAIN_PATH/size${size} \
--probe-type grid_tile \
--layers ${layer} \
--prompt_suffix_indices all
done
elif [ "$location_name" == "output" ]; then
location="post_reasoning"
echo "- Preparing padded activations for general probes (${location})"
interp-cli prepare_activations_for_probing \
--trajectories-dir $TRAJECTORIES_TRAIN_PATH \
--activations-dir $ACTIVATIONS_TRAIN_PATH \
--probe-type grid_tile \
--layers $layer \
--output_indices all
for size in "${TEST_GRID_SIZES[@]}"; do
echo "- Preparing activations for size-specific probes (${location}, grid size ${size}x${size})"
interp-cli prepare_activations_for_probing \
--trajectories-dir $TRAJECTORIES_TRAIN_PATH/size${size} \
--activations-dir $ACTIVATIONS_TRAIN_PATH/size${size} \
--probe-type grid_tile \
--layers ${layer} \
--output_indices all
done
fi
echo ""
echo "Activation preparation completed!"
echo ""
for probe_type in "${PROBE_TYPES[@]}"; do
echo "- Training general ${probe_type} probe on all ${location} activations"
CURRENT_PROBE_PATH="${PROBES_PATH}/cognitive_map_probe_layer${layer}_${probe_type}_${location}_all_general.pt"
interp-cli train_cognitive_map_probe \
--train-data-path ${ACTIVATIONS_TRAIN_PATH}/cognitive_map_activations_l${layer}_s0_${location_name}_all_grid_tile_pad15_merged.pt \
--output-path $CURRENT_PROBE_PATH \
--model-type ${probe_type} \
--hidden-dims $MLP_PROBE_HIDDEN_DIMS \
--learning-rate $PROBE_LEARNING_RATE \
--weight-decay $PROBE_WEIGHT_DECAY \
--dropout $PROBE_DROPOUT \
--num-epochs $PROBE_NUM_EPOCHS \
--batch-size $PROBE_BATCH_SIZE \
--eval-split $PROBE_EVAL_SPLIT \
--per-class-max-count $PROBE_PER_CLASS_MAX_COUNT \
--device cuda \
--verbose \
--normalize \
--subset 1.0 \
--balance-classes
echo "- Evaluating general ${probe_type} probe on all ${location} activations"
EVAL_OUTPUT_DIR="${TRAJECTORIES_OUTPUT_PATH}/layer${layer}/${probe_type}_general/${location}"
EVAL_OUTPUT_BASE="eval_cognitive_map_probe_layer${layer}_${probe_type}_${location}_all_general"
mkdir -p "${EVAL_OUTPUT_DIR}"
interp-cli eval_cognitive_map_probe \
--trajectories-dir $TRAJECTORIES_TEST_PATH \
--activations-dir $ACTIVATION_TEST_PATH \
--probe-path $CURRENT_PROBE_PATH \
--layers $layer \
--steps all \
--output-indices all \
--pad-to-size 15 \
--output_path "${EVAL_OUTPUT_DIR}/${EVAL_OUTPUT_BASE}.json" \
--verbose \
| tee "${EVAL_OUTPUT_DIR}/${EVAL_OUTPUT_BASE}.txt"
echo "- Generating predictions for test trajectories for general ${probe_type} probe on all ${location} activations"
for size in "${TEST_GRID_SIZES[@]}"; do
interp-cli apply_cognitive_map_probe \
--trajectories-dir $TRAJECTORIES_TEST_PATH/size${size} \
--activations-dir $ACTIVATION_TEST_PATH/size${size} \
--probe-path $CURRENT_PROBE_PATH \
--output-dir ${TRAJECTORIES_OUTPUT_PATH}/layer${layer}/${probe_type}_general/${location}/size${size} \
--layers $layer \
--steps all \
--output-indices all
done
for size in "${TEST_GRID_SIZES[@]}"; do
CURRENT_PROBE_PATH="${PROBES_PATH}/cognitive_map_probe_layer${layer}_${probe_type}_${location}_all_size${size}.pt"
echo "- Training ${size}x${size} ${probe_type} probe on all ${location} activations"
interp-cli train_cognitive_map_probe \
--train-data-path ${ACTIVATIONS_TRAIN_PATH}/cognitive_map_activations_l${layer}_s0_${location_name}_all_grid_tile_grid${size}.pt \
--output-path $CURRENT_PROBE_PATH \
--model-type ${probe_type} \
--hidden-dims $MLP_PROBE_HIDDEN_DIMS \
--learning-rate $PROBE_LEARNING_RATE \
--weight-decay $PROBE_WEIGHT_DECAY \
--dropout $PROBE_DROPOUT \
--num-epochs $PROBE_NUM_EPOCHS \
--batch-size $PROBE_BATCH_SIZE \
--eval-split $PROBE_EVAL_SPLIT \
--per-class-max-count $PROBE_PER_CLASS_MAX_COUNT \
--device cuda \
--verbose \
--normalize \
--subset 1.0 \
--balance-classes
echo "- Evaluating ${size}x${size} ${probe_type} probe on all ${location} activations"
EVAL_OUTPUT_DIR="${TRAJECTORIES_OUTPUT_PATH}/layer${layer}/${probe_type}/${location}/size${size}"
EVAL_OUTPUT_BASE="eval_cognitive_map_probe_layer${layer}_${probe_type}_${location}_all_size${size}"
mkdir -p "${EVAL_OUTPUT_DIR}"
interp-cli eval_cognitive_map_probe \
--trajectories-dir $TRAJECTORIES_TEST_PATH/size${size} \
--activations-dir $ACTIVATION_TEST_PATH/size${size} \
--probe-path $CURRENT_PROBE_PATH \
--layers $layer \
--steps all \
--output-indices all \
--pad-to-size $size \
--output_path "${EVAL_OUTPUT_DIR}/${EVAL_OUTPUT_BASE}.json" \
--verbose \
| tee "${EVAL_OUTPUT_DIR}/${EVAL_OUTPUT_BASE}.txt"
echo "- Generating predictions for test trajectories for ${size}x${size} ${probe_type} probe on all ${location} activations"
interp-cli apply_cognitive_map_probe \
--trajectories-dir $TRAJECTORIES_TEST_PATH/size${size} \
--activations-dir $ACTIVATION_TEST_PATH/size${size} \
--probe-path $CURRENT_PROBE_PATH \
--output-dir ${TRAJECTORIES_OUTPUT_PATH}/layer${layer}/${probe_type}/${location}/size${size} \
--layers $layer \
--steps all \
--output-indices all
done
done
done
done
``` |