""" All paths centralized. DATA_ROOT env overrides default. Default layout (relative to project root): studentsimulation/ └── data/ ├── models/ ├── cots/ ├── routing/ ├── activations/ ├── checkpoints/ ├── logs/ └── results/ """ import os from pathlib import Path PROJECT_ROOT = Path(__file__).resolve().parent.parent DATA_ROOT = Path(os.environ.get("DATA_ROOT", PROJECT_ROOT / "data")).resolve() # Models MODELS_DIR = DATA_ROOT / "models" # CoT data COTS_DIR = DATA_ROOT / "cots" RAW_COTS_PATH = COTS_DIR / "raw_cots.jsonl" LABELED_COTS_PATH = COTS_DIR / "labeled_cots.jsonl" # Test sets TEST_MATH_PATH = COTS_DIR / "test_math500_holdout.jsonl" TEST_AIME_PATH = COTS_DIR / "test_aime24.jsonl" TEST_GPQA_PATH = COTS_DIR / "test_gpqa.jsonl" # Routing captures (sharded) ROUTING_DIR = DATA_ROOT / "routing" # Residual stream captures ACTIVATIONS_DIR = DATA_ROOT / "activations" RESIDUALS_PATH = ACTIVATIONS_DIR / "decision_point_residuals.pt" GENERAL_RESIDUALS_PATH = ACTIVATIONS_DIR / "general_residuals.pt" # Checkpoints (directions + selected experts) CHECKPOINTS_DIR = DATA_ROOT / "checkpoints" TOP_EXPERTS_PLAN_PATH = CHECKPOINTS_DIR / "top_experts_planning.json" TOP_EXPERTS_MON_PATH = CHECKPOINTS_DIR / "top_experts_monitoring.json" TARGET_LAYERS_PATH = CHECKPOINTS_DIR / "target_layers.json" GENERAL_DIR_PATH = CHECKPOINTS_DIR / "general_direction.pt" # Direction versions (kept only 2 after empirical findings) PLAN_V1_RAW = CHECKPOINTS_DIR / "planning_v1_raw.pt" PLAN_V_PCA_SUBSPACE = CHECKPOINTS_DIR / "planning_v_pca_subspace.pt" MON_V1_RAW = CHECKPOINTS_DIR / "monitoring_v1_raw.pt" MON_V_PCA_SUBSPACE = CHECKPOINTS_DIR / "monitoring_v_pca_subspace.pt" # Logs (per-script) LOGS_DIR = DATA_ROOT / "logs" # Results RESULTS_DIR = DATA_ROOT / "results" ROUTING_HEATMAP_PLAN = RESULTS_DIR / "routing_heatmap_planning.png" ROUTING_HEATMAP_MON = RESULTS_DIR / "routing_heatmap_monitoring.png" INTERACTION_HEATMAP = RESULTS_DIR / "interaction_heatmap.png" DIRECTION_COSINE_MATRIX = RESULTS_DIR / "direction_cosine_matrix.png" SWEEP_RR_JSON = RESULTS_DIR / "sweep_rr.json" SWEEP_PQS_JSON = RESULTS_DIR / "sweep_pqs.json" SWEEP_CURVES_FIG = RESULTS_DIR / "sweep_curves.png" DOWNSTREAM_ACC_JSON = RESULTS_DIR / "downstream_accuracy.json" FINAL_REPORT = RESULTS_DIR / "final_report.md" # Attention diagnostic (Apr 2026) ATTN_RESIDUALS_PATH = ACTIVATIONS_DIR / "attention_outputs.pt" ATTN_DIAGNOSTIC_PATH = RESULTS_DIR / "attention_diagnostic.json" ATTN_DIAGNOSTIC_FIG = RESULTS_DIR / "attention_diagnostic.png" def ensure_dirs(): for d in [MODELS_DIR, COTS_DIR, ROUTING_DIR, ACTIVATIONS_DIR, CHECKPOINTS_DIR, LOGS_DIR, RESULTS_DIR]: d.mkdir(parents=True, exist_ok=True) if __name__ == "__main__": ensure_dirs() print(f"PROJECT_ROOT = {PROJECT_ROOT}") print(f"DATA_ROOT = {DATA_ROOT}") print("All directories ready.")