Spaces:
Sleeping
Sleeping
| from pathlib import Path | |
| from typing import List | |
| from models import Domain, EmbeddingModel, FaultType | |
| _CORPORA_DIR = Path(__file__).parent.parent / "corpora" | |
| _TASK_DOMAIN = {1: Domain.SOFTWARE, 2: Domain.CLIMATE, 3: Domain.MEDICAL} | |
| _TASK_DESCRIPTION = { | |
| 1: ( | |
| "Task 1 (Easy β Software): The RAG pipeline has one or two config faults " | |
| "on a Python-documentation corpus. Diagnose the retrieval degradation and " | |
| "fix the pipeline configuration to achieve high coverage of relevant chunks." | |
| ), | |
| 2: ( | |
| "Task 2 (Medium β Climate): The RAG pipeline has compound config faults " | |
| "on a climate-science corpus. Multiple issues interact; you must fix both " | |
| "to recover good retrieval quality." | |
| ), | |
| 3: ( | |
| "Task 3 (Hard β Medical): The pipeline uses the wrong embedding model on a " | |
| "medical corpus and has additional config faults. Identify the model mismatch " | |
| "and all config issues; pay special attention to multi-hop queries." | |
| ), | |
| } | |
| # Per-task episode query budget | |
| _N_EPISODE_QUERIES = {1: 5, 2: 5, 3: 5} # Task 3: 3 regular + 2 multi-hop | |
| _MAX_STEPS = 10 | |
| # Model name β numpy file suffix | |
| _MODEL_FILE = { | |
| EmbeddingModel.GENERAL: "general", | |
| EmbeddingModel.MEDICAL: "medical", | |
| EmbeddingModel.LEGAL: "legal", | |
| EmbeddingModel.CODE: "code", | |
| } | |
| # Fault sets per task | |
| _TASK1_FAULT_SETS: List[List[FaultType]] = [ | |
| [FaultType.CHUNK_TOO_LARGE, FaultType.NO_RERANKING], # compound | |
| [FaultType.THRESHOLD_TOO_HIGH], | |
| [FaultType.TOP_K_TOO_SMALL], | |
| [FaultType.CHUNK_TOO_LARGE], | |
| ] | |
| _TASK2_FAULT_SETS: List[List[FaultType]] = [ | |
| [FaultType.THRESHOLD_TOO_LOW, FaultType.DUPLICATE_FLOODING], # compound | |
| [FaultType.TOP_K_TOO_SMALL, FaultType.CONTEXT_OVERFLOW], # compound | |
| [FaultType.DUPLICATE_FLOODING], | |
| [FaultType.CONTEXT_OVERFLOW], | |
| ] | |
| _TASK3_FAULTS: List[FaultType] = [ | |
| FaultType.WRONG_EMBEDDING_MODEL, | |
| FaultType.CHUNK_TOO_LARGE, | |
| FaultType.THRESHOLD_TOO_HIGH, | |
| ] | |