| # Multi-Manifold Retrieval - Default Configuration | |
| seed: 42 | |
| # Encoder settings | |
| encoder: | |
| model_name: "sentence-transformers/all-MiniLM-L6-v2" | |
| embedding_dim: 384 | |
| freeze: true # Freeze pretrained encoders | |
| # Cross-manifold operator (Construction C) | |
| cross_manifold: | |
| num_heads: 4 | |
| head_dim: 96 # embedding_dim / num_heads | |
| value_mlp_hidden: 256 | |
| value_mlp_layers: 2 | |
| dropout: 0.1 | |
| # Training | |
| training: | |
| batch_size: 64 | |
| learning_rate: 2.0e-4 | |
| weight_decay: 1.0e-2 | |
| epochs: 5 | |
| warmup_steps: 500 | |
| max_train_samples: 100000 | |
| num_negatives: 7 | |
| max_seq_length: 128 | |
| fp16: true | |
| gradient_accumulation_steps: 1 | |
| log_every: 100 | |
| eval_every: 2000 | |
| save_dir: "checkpoints" | |
| # Evaluation | |
| evaluation: | |
| max_eval_queries: 5000 | |
| metrics: | |
| - mrr@10 | |
| - recall@100 | |
| # Spectral analysis | |
| spectral: | |
| num_documents: 1000 | |
| num_queries: 500 | |
| k_neighbors: 20 # For sparse Laplacian (optional) | |
| # Attack simulation | |
| attack: | |
| target_domain: "medical" | |
| num_target_queries: 100 | |
| top_k: 10 | |
| medical_keywords: | |
| - "health" | |
| - "medical" | |
| - "doctor" | |
| - "patient" | |
| - "treatment" | |
| - "disease" | |
| - "symptom" | |
| - "diagnosis" | |
| - "medicine" | |
| - "clinical" | |