iris-at-text2sparql / src /selection.py
Alex Latipov
Harden frozen eval prompts and judge JSON handling
d745844
"""Candidate selection for the Text2SPARQL repair pipeline.
Selects exactly one best candidate using purely symbolic scoring.
No LLM tie-breaker in v1.
"""
from __future__ import annotations
import logging
from .models import CandidateQuery, ValidationResult
logger = logging.getLogger(__name__)
def rank_candidates(
candidates: list[CandidateQuery],
validations: list[ValidationResult],
) -> list[tuple[str, float]]:
"""Rank candidates by validation score with tie-breaking.
Ranking rules (in order):
1. Higher validation score
2. Fewer suspicious flags
3. Fewer query characters (shorter queries preferred)
4. Lower generation index
Args:
candidates: List of candidate queries.
validations: List of validation results (same order as candidates).
Returns:
List of (candidate_id, score) tuples, sorted best-first.
"""
# Build lookup maps
val_map = {v.candidate_id: v for v in validations}
cand_map = {c.candidate_id: c for c in candidates}
ranked: list[tuple[str, float, int, int, int]] = []
for cand in candidates:
val = val_map.get(cand.candidate_id)
if val is None:
logger.warning(
"No validation result for candidate %s", cand.candidate_id
)
continue
ranked.append((
cand.candidate_id,
val.score,
len(val.suspicious_flags),
len(cand.query),
cand.generation_index,
))
# Sort: score DESC, then flags ASC, then length ASC, then index ASC
ranked.sort(key=lambda x: (-x[1], x[2], x[3], x[4]))
return [(cid, score) for cid, score, _, _, _ in ranked]
def select_best_candidate(
candidates: list[CandidateQuery],
validations: list[ValidationResult],
) -> CandidateQuery:
"""Select the single best candidate for deeper inspection.
Args:
candidates: List of candidate queries.
validations: List of validation results.
Returns:
The best CandidateQuery.
Raises:
ValueError: If no candidates are available.
"""
if not candidates:
raise ValueError("No candidates to select from")
ranking = rank_candidates(candidates, validations)
if not ranking:
logger.warning("Ranking produced no results — returning first candidate")
return candidates[0]
best_id = ranking[0][0]
best_score = ranking[0][1]
cand_map = {c.candidate_id: c for c in candidates}
best = cand_map[best_id]
logger.info(
"Selected candidate %s with score %.2f (out of %d candidates)",
best_id, best_score, len(candidates),
)
if len(ranking) > 1:
logger.debug(
"Runner-up: %s with score %.2f",
ranking[1][0], ranking[1][1],
)
return best