Spaces:

Rom89823974978
/

RAG_Eval

Sleeping

App Files Files Community

RAG_Eval / evaluation /config.py

Rom89823974978

Resolved tests issues

79bdbbe 6 months ago

raw

history blame contribute delete

3.13 kB

	"""Typed configuration objects used across the framework."""

	from dataclasses import dataclass, field
	from pathlib import Path
	from typing import Optional, Literal, Union


	@dataclass
	class LoggingConfig:
	"""Logging configuration (rotating file + console)."""

	log_dir: Path = Path("logs")
	level: str = "INFO" # DEBUG \| INFO \| WARNING \| ERROR \| CRITICAL
	max_mb: int = 5 # per-file size before rotation
	backups: int = 5 # number of rotated files to keep


	@dataclass
	class CrossEncoderConfig:
	"""Configuration for an optional cross-encoder re-ranker."""

	enable: bool = False # master switch
	model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
	device: str = "cpu"
	max_length: int = 512 # truncation length
	first_stage_k: int = 50 # how many docs to pass to re-ranker
	final_k: Optional[int] = None # override PipelineConfig.retriever.top_k


	@dataclass
	class RetrieverConfig:
	"""Configuration for a retriever back-end."""

	name: Literal["bm25", "dense", "hybrid"] = "bm25"
	top_k: int = 5

	# For backward compatibility with tests: allow index_path alias for sparse
	index_path: Optional[Union[str, Path]] = None # alias for bm25_index

	# Specific to BM25
	bm25_idx: Optional[Union[str, Path]] = None
	doc_store: Optional[Union[str, Path]] = None

	# For dense-only
	faiss_index: Optional[Union[str, Path]] = None
	model_name: str = "sentence-transformers/all-MiniLM-L6-v2"
	embedder_cache: Optional[Union[str, Path]] = None
	device: str = "cpu"

	# For hybrid only
	alpha: float = 0.5 # sparse ↔ dense weight

	def __post_init__(self):
	# If index_path is provided (legacy), use it as bm25_index
	if self.index_path:
	self.bm25_idx = self.index_path


	@dataclass
	class GeneratorConfig:
	"""Configuration for the text generator."""

	model_name: str = "google/flan-t5-base"
	device: str = "cpu"
	max_new_tokens: int = 256
	temperature: float = 0.0


	@dataclass
	class StatsConfig:
	"""Configuration for statistical tests & robustness analyses."""

	# Correlation (RQ1 & RQ2)
	correlation_method: Literal["spearman", "kendall"] = "spearman"
	n_boot: int = 1000 # bootstrap replicates for CIs
	ci: float = 0.95 # confidence level (e.g. 0.95 = 95 %)

	# Significance tests (RQ2)
	wilcoxon_alternative: Literal["two-sided", "greater", "less"] = "two-sided"
	multiple_correction: Literal["holm-bonferroni", "none"] = "holm-bonferroni"
	alpha: float = 0.05 # family-wise error rate

	# Robustness / sensitivity (RQ3 & RQ4)
	compute_effect_size: bool = True
	n_permutations: int = 1000
	failure_threshold: float = 0.0


	@dataclass
	class PipelineConfig:
	"""Top-level pipeline configuration."""

	logging: LoggingConfig = field(default_factory=LoggingConfig)
	reranker: CrossEncoderConfig = field(default_factory=CrossEncoderConfig)
	retriever: RetrieverConfig = field(default_factory=RetrieverConfig)
	generator: GeneratorConfig = field(default_factory=GeneratorConfig)
	stats: StatsConfig = field(default_factory=StatsConfig)