YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
TRAIN_LAYERS=(7 15 23)
TEST_GRID_SIZES=(7 9 11 13 15)
PROBING_LOCATION_NAMES=("suffix" "output") # Pre- and post-reasoning
ACTIVATIONS_TRAIN_PATH="interp/activations_train_single_step"
TRAJECTORIES_TRAIN_PATH="reveng/trajectories_train_single_step"
ACTIVATION_TEST_PATH="interp/activations_test_full"
TRAJECTORIES_TEST_PATH="reveng/trajectories_test_full"
TRAJECTORIES_OUTPUT_PATH="reveng/trajectories_test_full_with_probes"
PROBES_PATH="interp/probes_train_single_step"
PROBE_TYPES=("mlp" "lr")
MLP_PROBE_HIDDEN_DIMS=1024
PROBE_LEARNING_RATE=3e-4
PROBE_WEIGHT_DECAY=0.001
PROBE_NUM_EPOCHS=50
PROBE_BATCH_SIZE=2048
PROBE_DROPOUT=0.0
PROBE_EVAL_SPLIT=0.005
PROBE_PER_CLASS_MAX_COUNT=357000 # ~10x minority classes for general probes (36k train grids)
for layer in "${TRAIN_LAYERS[@]}"; do
echo "=== Training and evaluating probes on layer ${layer} activations ==="
for location_name in "${PROBING_LOCATION_NAMES[@]}"; do
if [ "$location_name" == "suffix" ]; then
location="pre_reasoning"
echo "- Preparing padded activations for general probes (${location})"
interp-cli prepare_activations_for_probing \
--trajectories-dir $TRAJECTORIES_TRAIN_PATH \
--activations-dir $ACTIVATIONS_TRAIN_PATH \
--probe-type grid_tile \
--layers $layer \
--prompt_suffix_indices all
for size in "${TEST_GRID_SIZES[@]}"; do
echo "- Preparing activations for size-specific probes (${location}, grid size ${size}x${size})"
interp-cli prepare_activations_for_probing \
--trajectories-dir $TRAJECTORIES_TRAIN_PATH/size${size} \
--activations-dir $ACTIVATIONS_TRAIN_PATH/size${size} \
--probe-type grid_tile \
--layers ${layer} \
--prompt_suffix_indices all
done
elif [ "$location_name" == "output" ]; then
location="post_reasoning"
echo "- Preparing padded activations for general probes (${location})"
interp-cli prepare_activations_for_probing \
--trajectories-dir $TRAJECTORIES_TRAIN_PATH \
--activations-dir $ACTIVATIONS_TRAIN_PATH \
--probe-type grid_tile \
--layers $layer \
--output_indices all
for size in "${TEST_GRID_SIZES[@]}"; do
echo "- Preparing activations for size-specific probes (${location}, grid size ${size}x${size})"
interp-cli prepare_activations_for_probing \
--trajectories-dir $TRAJECTORIES_TRAIN_PATH/size${size} \
--activations-dir $ACTIVATIONS_TRAIN_PATH/size${size} \
--probe-type grid_tile \
--layers ${layer} \
--output_indices all
done
fi
echo ""
echo "Activation preparation completed!"
echo ""
for probe_type in "${PROBE_TYPES[@]}"; do
echo "- Training general ${probe_type} probe on all ${location} activations"
CURRENT_PROBE_PATH="${PROBES_PATH}/cognitive_map_probe_layer${layer}_${probe_type}_${location}_all_general.pt"
interp-cli train_cognitive_map_probe \
--train-data-path ${ACTIVATIONS_TRAIN_PATH}/cognitive_map_activations_l${layer}_s0_${location_name}_all_grid_tile_pad15_merged.pt \
--output-path $CURRENT_PROBE_PATH \
--model-type ${probe_type} \
--hidden-dims $MLP_PROBE_HIDDEN_DIMS \
--learning-rate $PROBE_LEARNING_RATE \
--weight-decay $PROBE_WEIGHT_DECAY \
--dropout $PROBE_DROPOUT \
--num-epochs $PROBE_NUM_EPOCHS \
--batch-size $PROBE_BATCH_SIZE \
--eval-split $PROBE_EVAL_SPLIT \
--per-class-max-count $PROBE_PER_CLASS_MAX_COUNT \
--device cuda \
--verbose \
--normalize \
--subset 1.0 \
--balance-classes
echo "- Evaluating general ${probe_type} probe on all ${location} activations"
EVAL_OUTPUT_DIR="${TRAJECTORIES_OUTPUT_PATH}/layer${layer}/${probe_type}_general/${location}"
EVAL_OUTPUT_BASE="eval_cognitive_map_probe_layer${layer}_${probe_type}_${location}_all_general"
mkdir -p "${EVAL_OUTPUT_DIR}"
interp-cli eval_cognitive_map_probe \
--trajectories-dir $TRAJECTORIES_TEST_PATH \
--activations-dir $ACTIVATION_TEST_PATH \
--probe-path $CURRENT_PROBE_PATH \
--layers $layer \
--steps all \
--output-indices all \
--pad-to-size 15 \
--output_path "${EVAL_OUTPUT_DIR}/${EVAL_OUTPUT_BASE}.json" \
--verbose \
| tee "${EVAL_OUTPUT_DIR}/${EVAL_OUTPUT_BASE}.txt"
echo "- Generating predictions for test trajectories for general ${probe_type} probe on all ${location} activations"
for size in "${TEST_GRID_SIZES[@]}"; do
interp-cli apply_cognitive_map_probe \
--trajectories-dir $TRAJECTORIES_TEST_PATH/size${size} \
--activations-dir $ACTIVATION_TEST_PATH/size${size} \
--probe-path $CURRENT_PROBE_PATH \
--output-dir ${TRAJECTORIES_OUTPUT_PATH}/layer${layer}/${probe_type}_general/${location}/size${size} \
--layers $layer \
--steps all \
--output-indices all
done
for size in "${TEST_GRID_SIZES[@]}"; do
CURRENT_PROBE_PATH="${PROBES_PATH}/cognitive_map_probe_layer${layer}_${probe_type}_${location}_all_size${size}.pt"
echo "- Training ${size}x${size} ${probe_type} probe on all ${location} activations"
interp-cli train_cognitive_map_probe \
--train-data-path ${ACTIVATIONS_TRAIN_PATH}/cognitive_map_activations_l${layer}_s0_${location_name}_all_grid_tile_grid${size}.pt \
--output-path $CURRENT_PROBE_PATH \
--model-type ${probe_type} \
--hidden-dims $MLP_PROBE_HIDDEN_DIMS \
--learning-rate $PROBE_LEARNING_RATE \
--weight-decay $PROBE_WEIGHT_DECAY \
--dropout $PROBE_DROPOUT \
--num-epochs $PROBE_NUM_EPOCHS \
--batch-size $PROBE_BATCH_SIZE \
--eval-split $PROBE_EVAL_SPLIT \
--per-class-max-count $PROBE_PER_CLASS_MAX_COUNT \
--device cuda \
--verbose \
--normalize \
--subset 1.0 \
--balance-classes
echo "- Evaluating ${size}x${size} ${probe_type} probe on all ${location} activations"
EVAL_OUTPUT_DIR="${TRAJECTORIES_OUTPUT_PATH}/layer${layer}/${probe_type}/${location}/size${size}"
EVAL_OUTPUT_BASE="eval_cognitive_map_probe_layer${layer}_${probe_type}_${location}_all_size${size}"
mkdir -p "${EVAL_OUTPUT_DIR}"
interp-cli eval_cognitive_map_probe \
--trajectories-dir $TRAJECTORIES_TEST_PATH/size${size} \
--activations-dir $ACTIVATION_TEST_PATH/size${size} \
--probe-path $CURRENT_PROBE_PATH \
--layers $layer \
--steps all \
--output-indices all \
--pad-to-size $size \
--output_path "${EVAL_OUTPUT_DIR}/${EVAL_OUTPUT_BASE}.json" \
--verbose \
| tee "${EVAL_OUTPUT_DIR}/${EVAL_OUTPUT_BASE}.txt"
echo "- Generating predictions for test trajectories for ${size}x${size} ${probe_type} probe on all ${location} activations"
interp-cli apply_cognitive_map_probe \
--trajectories-dir $TRAJECTORIES_TEST_PATH/size${size} \
--activations-dir $ACTIVATION_TEST_PATH/size${size} \
--probe-path $CURRENT_PROBE_PATH \
--output-dir ${TRAJECTORIES_OUTPUT_PATH}/layer${layer}/${probe_type}/${location}/size${size} \
--layers $layer \
--steps all \
--output-indices all
done
done
done
done
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support