Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| """Repair module for the Text2SPARQL repair pipeline. | |
| Performs targeted repair on a SPARQL candidate using a single | |
| repair action with LLM assistance. Makes the smallest possible edit. | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| import re | |
| from .config import RuntimeConfig | |
| from .generation import normalize_query, parse_generation_output | |
| from .llm import LLMClient | |
| from .models import ( | |
| CandidateQuery, | |
| ContextPackage, | |
| CoordinatorDecision, | |
| ExpertFeedback, | |
| QueryRequest, | |
| RepairResult, | |
| ValidationResult, | |
| ) | |
| from .prompts import build_repair_prompt | |
| from .utils import short_hash | |
| logger = logging.getLogger(__name__) | |
| def summarize_diff(old_query: str, new_query: str) -> str: | |
| """Summarize the difference between old and new queries. | |
| Produces a human-readable diff summary. | |
| Args: | |
| old_query: Original SPARQL query. | |
| new_query: Repaired SPARQL query. | |
| Returns: | |
| Diff summary string. | |
| """ | |
| old_lines = old_query.strip().splitlines() | |
| new_lines = new_query.strip().splitlines() | |
| if old_lines == new_lines: | |
| return "No changes." | |
| changes: list[str] = [] | |
| max_lines = max(len(old_lines), len(new_lines)) | |
| for i in range(max_lines): | |
| old_line = old_lines[i].strip() if i < len(old_lines) else "" | |
| new_line = new_lines[i].strip() if i < len(new_lines) else "" | |
| if old_line != new_line: | |
| if old_line and new_line: | |
| changes.append(f" Line {i+1}: '{old_line}' → '{new_line}'") | |
| elif old_line: | |
| changes.append(f" Line {i+1} removed: '{old_line}'") | |
| else: | |
| changes.append(f" Line {i+1} added: '{new_line}'") | |
| if not changes: | |
| return "Whitespace-only changes." | |
| summary = f"{len(changes)} line(s) changed:\n" + "\n".join(changes[:10]) | |
| if len(changes) > 10: | |
| summary += f"\n ... and {len(changes) - 10} more changes" | |
| return summary | |
| def _collect_evidence( | |
| decision: CoordinatorDecision, | |
| feedbacks: list[ExpertFeedback], | |
| validation: ValidationResult, | |
| ) -> list[str]: | |
| """Collect evidence relevant to the selected repair action. | |
| Args: | |
| decision: The coordinator decision with selected action. | |
| feedbacks: Expert feedback list. | |
| validation: Validation result. | |
| Returns: | |
| List of evidence strings. | |
| """ | |
| evidence: list[str] = [] | |
| # Add coordinator rationale | |
| evidence.extend(decision.rationale) | |
| # Add relevant expert evidence | |
| for fb in feedbacks: | |
| if fb.verdict != "ok": | |
| evidence.extend(fb.evidence) | |
| if fb.issue_summary: | |
| evidence.append(f"Expert {fb.expert_name}: {fb.issue_summary}") | |
| # Add validation evidence | |
| if validation.execution_error: | |
| evidence.append(f"Execution error: {validation.execution_error}") | |
| if validation.suspicious_flags: | |
| evidence.append(f"Suspicious flags: {', '.join(validation.suspicious_flags)}") | |
| if validation.result_count is not None: | |
| evidence.append(f"Result count: {validation.result_count}") | |
| return evidence | |
| def repair_candidate( | |
| request: QueryRequest, | |
| candidate: CandidateQuery, | |
| validation: ValidationResult, | |
| decision: CoordinatorDecision, | |
| feedbacks: list[ExpertFeedback], | |
| context: ContextPackage, | |
| repair_iteration: int, | |
| runtime: RuntimeConfig, | |
| llm: LLMClient, | |
| prior_queries: list[str] | None = None, | |
| ) -> RepairResult: | |
| """Perform a targeted repair on a candidate query. | |
| Rules: | |
| - Repair prompt instructs model to make the smallest possible edit | |
| - Repair prompt includes only the selected action and relevant evidence | |
| - Output must be SPARQL only | |
| - If output is identical to input after normalization, changed = False | |
| Args: | |
| request: The query request. | |
| candidate: Current candidate to repair. | |
| validation: Validation result for this candidate. | |
| decision: Coordinator decision with selected action. | |
| feedbacks: Expert feedback list. | |
| context: Context package. | |
| repair_iteration: Current repair iteration (0-indexed). | |
| runtime: Runtime configuration. | |
| llm: LLM client for repair. | |
| Returns: | |
| RepairResult with the repaired candidate. | |
| """ | |
| action = decision.selected_action or "entity_relink" | |
| # Collect evidence relevant to the action | |
| evidence = _collect_evidence(decision, feedbacks, validation) | |
| # Syntax fixes have a stricter prompt because they should not rewrite | |
| # semantically meaningful parts of the query. | |
| template_key = "syntax_repair" if action == "syntax_fix" else "repair" | |
| # Build and execute repair prompt | |
| prompt = build_repair_prompt( | |
| request.question, | |
| candidate.query, | |
| action, | |
| evidence, | |
| context, | |
| prior_queries=prior_queries, | |
| prompt_files=runtime.prompt_files, | |
| template_key=template_key, | |
| ) | |
| raw_output = llm.generate_text(prompt) | |
| # Extract SPARQL from output | |
| repaired_queries = parse_generation_output(raw_output) | |
| if repaired_queries: | |
| new_query = repaired_queries[0] | |
| else: | |
| # If no code block found, try using the raw output as SPARQL | |
| new_query = raw_output.strip() | |
| # Remove any markdown artifacts | |
| new_query = re.sub(r"^```\w*\s*", "", new_query) | |
| new_query = re.sub(r"\s*```$", "", new_query) | |
| # Check if query actually changed | |
| old_norm = normalize_query(candidate.query) | |
| new_norm = normalize_query(new_query) | |
| changed = old_norm != new_norm | |
| # Generate diff summary | |
| diff = summarize_diff(candidate.query, new_query) | |
| # Create new candidate | |
| new_candidate_id = f"repair_{repair_iteration}_{short_hash(new_query)}" | |
| new_candidate = CandidateQuery( | |
| candidate_id=new_candidate_id, | |
| query=new_query, | |
| source="repair", | |
| generation_index=candidate.generation_index, | |
| parent_candidate_id=candidate.candidate_id, | |
| repair_iteration=repair_iteration + 1, | |
| ) | |
| logger.info( | |
| "Repair applied (action=%s, changed=%s): %s → %s", | |
| action, changed, candidate.candidate_id, new_candidate_id, | |
| ) | |
| return RepairResult( | |
| old_candidate_id=candidate.candidate_id, | |
| new_candidate=new_candidate, | |
| action_used=action, | |
| changed=changed, | |
| diff_summary=diff, | |
| ) | |