"""Repairer package for turning detected issues into proposed fixes.""" from __future__ import annotations from pathlib import Path from dataforge.detectors.base import Issue, Schema from dataforge.repairers.base import ProposedFix, RepairAttempt, Repairer, RetryContext from dataforge.repairers.decimal_shift import DecimalShiftRepairer from dataforge.repairers.fd_violation import FDViolationRepairer from dataforge.repairers.type_mismatch import TypeMismatchRepairer from dataforge.table import TableLike __all__ = [ "DecimalShiftRepairer", "FDViolationRepairer", "ProposedFix", "RepairAttempt", "Repairer", "RetryContext", "TypeMismatchRepairer", "build_repairers", "propose_fixes", ] def build_repairers( *, cache_dir: Path | None, allow_llm: bool, model: str, ) -> dict[str, Repairer]: """Construct the default repairer registry.""" return { "type_mismatch": TypeMismatchRepairer(), "decimal_shift": DecimalShiftRepairer(), "fd_violation": FDViolationRepairer( cache_dir=cache_dir, allow_llm=allow_llm, model=model, ), } def propose_fixes( issues: list[Issue], df: TableLike, schema: Schema | None, *, cache_dir: Path | None, allow_llm: bool = False, model: str = "gemini-2.0-flash", ) -> list[ProposedFix]: """Run all Week 2 repairers and return proposed fixes. Args: issues: Detected issues from the detector layer. df: The input DataFrame being repaired. schema: Optional declared schema. cache_dir: Cache directory for any LLM-backed repair decisions. allow_llm: Whether fd-violation repair may call the LLM provider. model: The provider model name for fd-violation fallback. Returns: A deduplicated list of proposed fixes. """ registry = build_repairers( cache_dir=cache_dir, allow_llm=allow_llm, model=model, ) proposed: list[ProposedFix] = [] seen_cells: set[tuple[int, str]] = set() for issue in issues: repairer = registry.get(issue.issue_type) if repairer is None: continue fix = repairer.propose(issue, df, schema, retry_context=None) if fix is None: continue key = (fix.fix.row, fix.fix.column) if key in seen_cells: continue seen_cells.add(key) proposed.append(fix) return proposed