Spaces:

InsanAlex
/

iris-at-text2sparql

Running on CPU Upgrade

Alex Latipov

Harden frozen eval prompts and judge JSON handling

d745844 7 days ago

2.84 kB

	"""Candidate selection for the Text2SPARQL repair pipeline.

	Selects exactly one best candidate using purely symbolic scoring.
	No LLM tie-breaker in v1.
	"""

	from __future__ import annotations

	import logging

	from .models import CandidateQuery, ValidationResult

	logger = logging.getLogger(__name__)


	def rank_candidates(
	candidates: list[CandidateQuery],
	validations: list[ValidationResult],
	) -> list[tuple[str, float]]:
	"""Rank candidates by validation score with tie-breaking.

	Ranking rules (in order):
	1. Higher validation score
	2. Fewer suspicious flags
	3. Fewer query characters (shorter queries preferred)
	4. Lower generation index

	Args:
	candidates: List of candidate queries.
	validations: List of validation results (same order as candidates).

	Returns:
	List of (candidate_id, score) tuples, sorted best-first.
	"""
	# Build lookup maps
	val_map = {v.candidate_id: v for v in validations}
	cand_map = {c.candidate_id: c for c in candidates}

	ranked: list[tuple[str, float, int, int, int]] = []
	for cand in candidates:
	val = val_map.get(cand.candidate_id)
	if val is None:
	logger.warning(
	"No validation result for candidate %s", cand.candidate_id
	)
	continue
	ranked.append((
	cand.candidate_id,
	val.score,
	len(val.suspicious_flags),
	len(cand.query),
	cand.generation_index,
	))

	# Sort: score DESC, then flags ASC, then length ASC, then index ASC
	ranked.sort(key=lambda x: (-x[1], x[2], x[3], x[4]))

	return [(cid, score) for cid, score, _, _, _ in ranked]


	def select_best_candidate(
	candidates: list[CandidateQuery],
	validations: list[ValidationResult],
	) -> CandidateQuery:
	"""Select the single best candidate for deeper inspection.

	Args:
	candidates: List of candidate queries.
	validations: List of validation results.

	Returns:
	The best CandidateQuery.

	Raises:
	ValueError: If no candidates are available.
	"""
	if not candidates:
	raise ValueError("No candidates to select from")

	ranking = rank_candidates(candidates, validations)

	if not ranking:
	logger.warning("Ranking produced no results — returning first candidate")
	return candidates[0]

	best_id = ranking[0][0]
	best_score = ranking[0][1]

	cand_map = {c.candidate_id: c for c in candidates}
	best = cand_map[best_id]

	logger.info(
	"Selected candidate %s with score %.2f (out of %d candidates)",
	best_id, best_score, len(candidates),
	)

	if len(ranking) > 1:
	logger.debug(
	"Runner-up: %s with score %.2f",
	ranking[1][0], ranking[1][1],
	)

	return best