Praneshrajan15's picture
Deploy DataForge playground API
eed1cab verified
"""Repairer package for turning detected issues into proposed fixes."""
from __future__ import annotations
from pathlib import Path
from dataforge.detectors.base import Issue, Schema
from dataforge.repairers.base import ProposedFix, RepairAttempt, Repairer, RetryContext
from dataforge.repairers.decimal_shift import DecimalShiftRepairer
from dataforge.repairers.fd_violation import FDViolationRepairer
from dataforge.repairers.type_mismatch import TypeMismatchRepairer
from dataforge.table import TableLike
__all__ = [
"DecimalShiftRepairer",
"FDViolationRepairer",
"ProposedFix",
"RepairAttempt",
"Repairer",
"RetryContext",
"TypeMismatchRepairer",
"build_repairers",
"propose_fixes",
]
def build_repairers(
*,
cache_dir: Path | None,
allow_llm: bool,
model: str,
) -> dict[str, Repairer]:
"""Construct the default repairer registry."""
return {
"type_mismatch": TypeMismatchRepairer(),
"decimal_shift": DecimalShiftRepairer(),
"fd_violation": FDViolationRepairer(
cache_dir=cache_dir,
allow_llm=allow_llm,
model=model,
),
}
def propose_fixes(
issues: list[Issue],
df: TableLike,
schema: Schema | None,
*,
cache_dir: Path | None,
allow_llm: bool = False,
model: str = "gemini-2.0-flash",
) -> list[ProposedFix]:
"""Run all Week 2 repairers and return proposed fixes.
Args:
issues: Detected issues from the detector layer.
df: The input DataFrame being repaired.
schema: Optional declared schema.
cache_dir: Cache directory for any LLM-backed repair decisions.
allow_llm: Whether fd-violation repair may call the LLM provider.
model: The provider model name for fd-violation fallback.
Returns:
A deduplicated list of proposed fixes.
"""
registry = build_repairers(
cache_dir=cache_dir,
allow_llm=allow_llm,
model=model,
)
proposed: list[ProposedFix] = []
seen_cells: set[tuple[int, str]] = set()
for issue in issues:
repairer = registry.get(issue.issue_type)
if repairer is None:
continue
fix = repairer.propose(issue, df, schema, retry_context=None)
if fix is None:
continue
key = (fix.fix.row, fix.fix.column)
if key in seen_cells:
continue
seen_cells.add(key)
proposed.append(fix)
return proposed