"""Typed configuration objects used across the framework."""

from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional, Literal, Union


@dataclass
class LoggingConfig:
    """Logging configuration (rotating file + console)."""

    log_dir: Path = Path("logs")
    level: str = "INFO"  # DEBUG | INFO | WARNING | ERROR | CRITICAL
    max_mb: int = 5  # per-file size before rotation
    backups: int = 5  # number of rotated files to keep


@dataclass
class CrossEncoderConfig:
    """Configuration for an optional cross-encoder re-ranker."""

    enable: bool = False  # master switch
    model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
    device: str = "cpu"
    max_length: int = 512  # truncation length
    first_stage_k: int = 50  # how many docs to pass to re-ranker
    final_k: Optional[int] = None  # override PipelineConfig.retriever.top_k


@dataclass
class RetrieverConfig:
    """Configuration for a retriever back-end."""

    name: Literal["bm25", "dense", "hybrid"] = "bm25"
    top_k: int = 5

    # For backward compatibility with tests: allow index_path alias for sparse
    index_path: Optional[Union[str, Path]] = None  # alias for bm25_index

    # Specific to BM25
    bm25_idx: Optional[Union[str, Path]] = None
    doc_store: Optional[Union[str, Path]] = None

    # For dense-only
    faiss_index: Optional[Union[str, Path]] = None
    model_name: str = "sentence-transformers/all-MiniLM-L6-v2"
    embedder_cache: Optional[Union[str, Path]] = None
    device: str = "cpu"

    # For hybrid only
    alpha: float = 0.5  # sparse ↔ dense weight

    def __post_init__(self):
        # If index_path is provided (legacy), use it as bm25_index
        if self.index_path:
            self.bm25_idx = self.index_path


@dataclass
class GeneratorConfig:
    """Configuration for the text generator."""

    model_name: str = "google/flan-t5-base"
    device: str = "cpu"
    max_new_tokens: int = 256
    temperature: float = 0.0


@dataclass
class StatsConfig:
    """Configuration for statistical tests & robustness analyses."""

    # Correlation (RQ1 & RQ2)
    correlation_method: Literal["spearman", "kendall"] = "spearman"
    n_boot: int = 1000  # bootstrap replicates for CIs
    ci: float = 0.95  # confidence level (e.g. 0.95 = 95 %)

    # Significance tests (RQ2)
    wilcoxon_alternative: Literal["two-sided", "greater", "less"] = "two-sided"
    multiple_correction: Literal["holm-bonferroni", "none"] = "holm-bonferroni"
    alpha: float = 0.05  # family-wise error rate

    # Robustness / sensitivity (RQ3 & RQ4)
    compute_effect_size: bool = True
    n_permutations: int = 1000
    failure_threshold: float = 0.0


@dataclass
class PipelineConfig:
    """Top-level pipeline configuration."""

    logging: LoggingConfig = field(default_factory=LoggingConfig)
    reranker: CrossEncoderConfig = field(default_factory=CrossEncoderConfig)
    retriever: RetrieverConfig = field(default_factory=RetrieverConfig)
    generator: GeneratorConfig = field(default_factory=GeneratorConfig)
    stats: StatsConfig = field(default_factory=StatsConfig)