iris-at-text2sparql / src /repair.py
Alex Latipov
Harden frozen eval prompts and judge JSON handling
d745844
"""Repair module for the Text2SPARQL repair pipeline.
Performs targeted repair on a SPARQL candidate using a single
repair action with LLM assistance. Makes the smallest possible edit.
"""
from __future__ import annotations
import logging
import re
from .config import RuntimeConfig
from .generation import normalize_query, parse_generation_output
from .llm import LLMClient
from .models import (
CandidateQuery,
ContextPackage,
CoordinatorDecision,
ExpertFeedback,
QueryRequest,
RepairResult,
ValidationResult,
)
from .prompts import build_repair_prompt
from .utils import short_hash
logger = logging.getLogger(__name__)
def summarize_diff(old_query: str, new_query: str) -> str:
"""Summarize the difference between old and new queries.
Produces a human-readable diff summary.
Args:
old_query: Original SPARQL query.
new_query: Repaired SPARQL query.
Returns:
Diff summary string.
"""
old_lines = old_query.strip().splitlines()
new_lines = new_query.strip().splitlines()
if old_lines == new_lines:
return "No changes."
changes: list[str] = []
max_lines = max(len(old_lines), len(new_lines))
for i in range(max_lines):
old_line = old_lines[i].strip() if i < len(old_lines) else ""
new_line = new_lines[i].strip() if i < len(new_lines) else ""
if old_line != new_line:
if old_line and new_line:
changes.append(f" Line {i+1}: '{old_line}' → '{new_line}'")
elif old_line:
changes.append(f" Line {i+1} removed: '{old_line}'")
else:
changes.append(f" Line {i+1} added: '{new_line}'")
if not changes:
return "Whitespace-only changes."
summary = f"{len(changes)} line(s) changed:\n" + "\n".join(changes[:10])
if len(changes) > 10:
summary += f"\n ... and {len(changes) - 10} more changes"
return summary
def _collect_evidence(
decision: CoordinatorDecision,
feedbacks: list[ExpertFeedback],
validation: ValidationResult,
) -> list[str]:
"""Collect evidence relevant to the selected repair action.
Args:
decision: The coordinator decision with selected action.
feedbacks: Expert feedback list.
validation: Validation result.
Returns:
List of evidence strings.
"""
evidence: list[str] = []
# Add coordinator rationale
evidence.extend(decision.rationale)
# Add relevant expert evidence
for fb in feedbacks:
if fb.verdict != "ok":
evidence.extend(fb.evidence)
if fb.issue_summary:
evidence.append(f"Expert {fb.expert_name}: {fb.issue_summary}")
# Add validation evidence
if validation.execution_error:
evidence.append(f"Execution error: {validation.execution_error}")
if validation.suspicious_flags:
evidence.append(f"Suspicious flags: {', '.join(validation.suspicious_flags)}")
if validation.result_count is not None:
evidence.append(f"Result count: {validation.result_count}")
return evidence
def repair_candidate(
request: QueryRequest,
candidate: CandidateQuery,
validation: ValidationResult,
decision: CoordinatorDecision,
feedbacks: list[ExpertFeedback],
context: ContextPackage,
repair_iteration: int,
runtime: RuntimeConfig,
llm: LLMClient,
prior_queries: list[str] | None = None,
) -> RepairResult:
"""Perform a targeted repair on a candidate query.
Rules:
- Repair prompt instructs model to make the smallest possible edit
- Repair prompt includes only the selected action and relevant evidence
- Output must be SPARQL only
- If output is identical to input after normalization, changed = False
Args:
request: The query request.
candidate: Current candidate to repair.
validation: Validation result for this candidate.
decision: Coordinator decision with selected action.
feedbacks: Expert feedback list.
context: Context package.
repair_iteration: Current repair iteration (0-indexed).
runtime: Runtime configuration.
llm: LLM client for repair.
Returns:
RepairResult with the repaired candidate.
"""
action = decision.selected_action or "entity_relink"
# Collect evidence relevant to the action
evidence = _collect_evidence(decision, feedbacks, validation)
# Syntax fixes have a stricter prompt because they should not rewrite
# semantically meaningful parts of the query.
template_key = "syntax_repair" if action == "syntax_fix" else "repair"
# Build and execute repair prompt
prompt = build_repair_prompt(
request.question,
candidate.query,
action,
evidence,
context,
prior_queries=prior_queries,
prompt_files=runtime.prompt_files,
template_key=template_key,
)
raw_output = llm.generate_text(prompt)
# Extract SPARQL from output
repaired_queries = parse_generation_output(raw_output)
if repaired_queries:
new_query = repaired_queries[0]
else:
# If no code block found, try using the raw output as SPARQL
new_query = raw_output.strip()
# Remove any markdown artifacts
new_query = re.sub(r"^```\w*\s*", "", new_query)
new_query = re.sub(r"\s*```$", "", new_query)
# Check if query actually changed
old_norm = normalize_query(candidate.query)
new_norm = normalize_query(new_query)
changed = old_norm != new_norm
# Generate diff summary
diff = summarize_diff(candidate.query, new_query)
# Create new candidate
new_candidate_id = f"repair_{repair_iteration}_{short_hash(new_query)}"
new_candidate = CandidateQuery(
candidate_id=new_candidate_id,
query=new_query,
source="repair",
generation_index=candidate.generation_index,
parent_candidate_id=candidate.candidate_id,
repair_iteration=repair_iteration + 1,
)
logger.info(
"Repair applied (action=%s, changed=%s): %s → %s",
action, changed, candidate.candidate_id, new_candidate_id,
)
return RepairResult(
old_candidate_id=candidate.candidate_id,
new_candidate=new_candidate,
action_used=action,
changed=changed,
diff_summary=diff,
)