File size: 2,012 Bytes
f23deb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from pathlib import Path
from typing import List

from models import Domain, EmbeddingModel, FaultType

_CORPORA_DIR = Path(__file__).parent.parent / "corpora"

_TASK_DOMAIN = {1: Domain.SOFTWARE, 2: Domain.CLIMATE, 3: Domain.MEDICAL}

_TASK_DESCRIPTION = {
    1: (
        "Task 1 (Easy — Software): The RAG pipeline has one or two config faults "
        "on a Python-documentation corpus. Diagnose the retrieval degradation and "
        "fix the pipeline configuration to achieve high coverage of relevant chunks."
    ),
    2: (
        "Task 2 (Medium — Climate): The RAG pipeline has compound config faults "
        "on a climate-science corpus. Multiple issues interact; you must fix both "
        "to recover good retrieval quality."
    ),
    3: (
        "Task 3 (Hard — Medical): The pipeline uses the wrong embedding model on a "
        "medical corpus and has additional config faults. Identify the model mismatch "
        "and all config issues; pay special attention to multi-hop queries."
    ),
}

# Per-task episode query budget
_N_EPISODE_QUERIES = {1: 5, 2: 5, 3: 5}   # Task 3: 3 regular + 2 multi-hop
_MAX_STEPS = 10

# Model name → numpy file suffix
_MODEL_FILE = {
    EmbeddingModel.GENERAL: "general",
    EmbeddingModel.MEDICAL: "medical",
    EmbeddingModel.LEGAL:   "legal",
    EmbeddingModel.CODE:    "code",
}

# Fault sets per task
_TASK1_FAULT_SETS: List[List[FaultType]] = [
    [FaultType.CHUNK_TOO_LARGE, FaultType.NO_RERANKING],   # compound
    [FaultType.THRESHOLD_TOO_HIGH],
    [FaultType.TOP_K_TOO_SMALL],
    [FaultType.CHUNK_TOO_LARGE],
]

_TASK2_FAULT_SETS: List[List[FaultType]] = [
    [FaultType.THRESHOLD_TOO_LOW, FaultType.DUPLICATE_FLOODING],  # compound
    [FaultType.TOP_K_TOO_SMALL, FaultType.CONTEXT_OVERFLOW],      # compound
    [FaultType.DUPLICATE_FLOODING],
    [FaultType.CONTEXT_OVERFLOW],
]

_TASK3_FAULTS: List[FaultType] = [
    FaultType.WRONG_EMBEDDING_MODEL,
    FaultType.CHUNK_TOO_LARGE,
    FaultType.THRESHOLD_TOO_HIGH,
]