File size: 1,181 Bytes
b464490
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Multi-Manifold Retrieval - Default Configuration

seed: 42

# Encoder settings
encoder:
  model_name: "sentence-transformers/all-MiniLM-L6-v2"
  embedding_dim: 384
  freeze: true  # Freeze pretrained encoders

# Cross-manifold operator (Construction C)
cross_manifold:
  num_heads: 4
  head_dim: 96  # embedding_dim / num_heads
  value_mlp_hidden: 256
  value_mlp_layers: 2
  dropout: 0.1

# Training
training:
  batch_size: 64
  learning_rate: 2.0e-4
  weight_decay: 1.0e-2
  epochs: 5
  warmup_steps: 500
  max_train_samples: 100000
  num_negatives: 7
  max_seq_length: 128
  fp16: true
  gradient_accumulation_steps: 1
  log_every: 100
  eval_every: 2000
  save_dir: "checkpoints"

# Evaluation
evaluation:
  max_eval_queries: 5000
  metrics:
    - mrr@10
    - recall@100

# Spectral analysis
spectral:
  num_documents: 1000
  num_queries: 500
  k_neighbors: 20  # For sparse Laplacian (optional)

# Attack simulation
attack:
  target_domain: "medical"
  num_target_queries: 100
  top_k: 10
  medical_keywords:
    - "health"
    - "medical"
    - "doctor"
    - "patient"
    - "treatment"
    - "disease"
    - "symptom"
    - "diagnosis"
    - "medicine"
    - "clinical"