bragee's picture
Upload model checkpoints and code
b464490 verified
# Multi-Manifold Retrieval - Default Configuration
seed: 42
# Encoder settings
encoder:
model_name: "sentence-transformers/all-MiniLM-L6-v2"
embedding_dim: 384
freeze: true # Freeze pretrained encoders
# Cross-manifold operator (Construction C)
cross_manifold:
num_heads: 4
head_dim: 96 # embedding_dim / num_heads
value_mlp_hidden: 256
value_mlp_layers: 2
dropout: 0.1
# Training
training:
batch_size: 64
learning_rate: 2.0e-4
weight_decay: 1.0e-2
epochs: 5
warmup_steps: 500
max_train_samples: 100000
num_negatives: 7
max_seq_length: 128
fp16: true
gradient_accumulation_steps: 1
log_every: 100
eval_every: 2000
save_dir: "checkpoints"
# Evaluation
evaluation:
max_eval_queries: 5000
metrics:
- mrr@10
- recall@100
# Spectral analysis
spectral:
num_documents: 1000
num_queries: 500
k_neighbors: 20 # For sparse Laplacian (optional)
# Attack simulation
attack:
target_domain: "medical"
num_target_queries: 100
top_k: 10
medical_keywords:
- "health"
- "medical"
- "doctor"
- "patient"
- "treatment"
- "disease"
- "symptom"
- "diagnosis"
- "medicine"
- "clinical"