Spaces:

InsanAlex
/

iris-at-text2sparql

Running on CPU Upgrade

Alex Latipov

Harden frozen eval prompts and judge JSON handling

d745844 9 days ago

6.43 kB

	"""Repair module for the Text2SPARQL repair pipeline.

	Performs targeted repair on a SPARQL candidate using a single
	repair action with LLM assistance. Makes the smallest possible edit.
	"""

	from __future__ import annotations

	import logging
	import re

	from .config import RuntimeConfig
	from .generation import normalize_query, parse_generation_output
	from .llm import LLMClient
	from .models import (
	CandidateQuery,
	ContextPackage,
	CoordinatorDecision,
	ExpertFeedback,
	QueryRequest,
	RepairResult,
	ValidationResult,
	)
	from .prompts import build_repair_prompt
	from .utils import short_hash

	logger = logging.getLogger(__name__)


	def summarize_diff(old_query: str, new_query: str) -> str:
	"""Summarize the difference between old and new queries.

	Produces a human-readable diff summary.

	Args:
	old_query: Original SPARQL query.
	new_query: Repaired SPARQL query.

	Returns:
	Diff summary string.
	"""
	old_lines = old_query.strip().splitlines()
	new_lines = new_query.strip().splitlines()

	if old_lines == new_lines:
	return "No changes."

	changes: list[str] = []
	max_lines = max(len(old_lines), len(new_lines))

	for i in range(max_lines):
	old_line = old_lines[i].strip() if i < len(old_lines) else ""
	new_line = new_lines[i].strip() if i < len(new_lines) else ""

	if old_line != new_line:
	if old_line and new_line:
	changes.append(f" Line {i+1}: '{old_line}' → '{new_line}'")
	elif old_line:
	changes.append(f" Line {i+1} removed: '{old_line}'")
	else:
	changes.append(f" Line {i+1} added: '{new_line}'")

	if not changes:
	return "Whitespace-only changes."

	summary = f"{len(changes)} line(s) changed:\n" + "\n".join(changes[:10])
	if len(changes) > 10:
	summary += f"\n ... and {len(changes) - 10} more changes"

	return summary


	def _collect_evidence(
	decision: CoordinatorDecision,
	feedbacks: list[ExpertFeedback],
	validation: ValidationResult,
	) -> list[str]:
	"""Collect evidence relevant to the selected repair action.

	Args:
	decision: The coordinator decision with selected action.
	feedbacks: Expert feedback list.
	validation: Validation result.

	Returns:
	List of evidence strings.
	"""
	evidence: list[str] = []

	# Add coordinator rationale
	evidence.extend(decision.rationale)

	# Add relevant expert evidence
	for fb in feedbacks:
	if fb.verdict != "ok":
	evidence.extend(fb.evidence)
	if fb.issue_summary:
	evidence.append(f"Expert {fb.expert_name}: {fb.issue_summary}")

	# Add validation evidence
	if validation.execution_error:
	evidence.append(f"Execution error: {validation.execution_error}")
	if validation.suspicious_flags:
	evidence.append(f"Suspicious flags: {', '.join(validation.suspicious_flags)}")
	if validation.result_count is not None:
	evidence.append(f"Result count: {validation.result_count}")

	return evidence


	def repair_candidate(
	request: QueryRequest,
	candidate: CandidateQuery,
	validation: ValidationResult,
	decision: CoordinatorDecision,
	feedbacks: list[ExpertFeedback],
	context: ContextPackage,
	repair_iteration: int,
	runtime: RuntimeConfig,
	llm: LLMClient,
	prior_queries: list[str] \| None = None,
	) -> RepairResult:
	"""Perform a targeted repair on a candidate query.

	Rules:
	- Repair prompt instructs model to make the smallest possible edit
	- Repair prompt includes only the selected action and relevant evidence
	- Output must be SPARQL only
	- If output is identical to input after normalization, changed = False

	Args:
	request: The query request.
	candidate: Current candidate to repair.
	validation: Validation result for this candidate.
	decision: Coordinator decision with selected action.
	feedbacks: Expert feedback list.
	context: Context package.
	repair_iteration: Current repair iteration (0-indexed).
	runtime: Runtime configuration.
	llm: LLM client for repair.

	Returns:
	RepairResult with the repaired candidate.
	"""
	action = decision.selected_action or "entity_relink"

	# Collect evidence relevant to the action
	evidence = _collect_evidence(decision, feedbacks, validation)

	# Syntax fixes have a stricter prompt because they should not rewrite
	# semantically meaningful parts of the query.
	template_key = "syntax_repair" if action == "syntax_fix" else "repair"

	# Build and execute repair prompt
	prompt = build_repair_prompt(
	request.question,
	candidate.query,
	action,
	evidence,
	context,
	prior_queries=prior_queries,
	prompt_files=runtime.prompt_files,
	template_key=template_key,
	)

	raw_output = llm.generate_text(prompt)

	# Extract SPARQL from output
	repaired_queries = parse_generation_output(raw_output)

	if repaired_queries:
	new_query = repaired_queries[0]
	else:
	# If no code block found, try using the raw output as SPARQL
	new_query = raw_output.strip()
	# Remove any markdown artifacts
	new_query = re.sub(r"^```\w\s", "", new_query)
	new_query = re.sub(r"\s*```$", "", new_query)

	# Check if query actually changed
	old_norm = normalize_query(candidate.query)
	new_norm = normalize_query(new_query)
	changed = old_norm != new_norm

	# Generate diff summary
	diff = summarize_diff(candidate.query, new_query)

	# Create new candidate
	new_candidate_id = f"repair_{repair_iteration}_{short_hash(new_query)}"
	new_candidate = CandidateQuery(
	candidate_id=new_candidate_id,
	query=new_query,
	source="repair",
	generation_index=candidate.generation_index,
	parent_candidate_id=candidate.candidate_id,
	repair_iteration=repair_iteration + 1,
	)

	logger.info(
	"Repair applied (action=%s, changed=%s): %s → %s",
	action, changed, candidate.candidate_id, new_candidate_id,
	)

	return RepairResult(
	old_candidate_id=candidate.candidate_id,
	new_candidate=new_candidate,
	action_used=action,
	changed=changed,
	diff_summary=diff,
	)