detection_base / utils /relevance.py
Zhen Ye
feat(backend): enhance inference pipeline with GLM logic and structured outputs
bb6e650
"""
Object relevance evaluation β€” deterministic gate between detection and GPT assessment.
Public functions:
evaluate_relevance(detection, criteria) -> RelevanceDecision (deterministic)
evaluate_relevance_llm(detected_labels, mission_text) -> set[str] (LLM post-filter)
INVARIANT INV-13 enforcement: evaluate_relevance() accepts RelevanceCriteria, NOT
MissionSpecification. It cannot see context_phrases, stripped_modifiers, or any
LLM-derived field. This is structural, not by convention.
"""
import json
import logging
from typing import Any, Dict, List, NamedTuple, Set
from utils.openai_client import chat_completion, extract_content, get_api_key, OpenAIAPIError
from coco_classes import canonicalize_coco_name
from utils.schemas import RelevanceCriteria
logger = logging.getLogger(__name__)
class RelevanceDecision(NamedTuple):
relevant: bool
reason: str # "ok" | "label_not_in_required_classes" | "below_confidence"
def evaluate_relevance(
detection: Dict[str, Any],
criteria: RelevanceCriteria,
) -> RelevanceDecision:
"""Evaluate whether a detection is relevant to the mission.
Pure deterministic predicate β€” no LLM involvement.
Args:
detection: Detection dict with at least 'label' and 'score' keys.
criteria: RelevanceCriteria with required_classes and min_confidence.
Returns:
RelevanceDecision(relevant=bool, reason=str).
"""
label = (detection.get("label") or "").lower().strip()
confidence = detection.get("score", 0.0)
if not label:
return RelevanceDecision(False, "label_not_in_required_classes")
# Build lowercase set of required classes for comparison
required_lower = {c.lower() for c in criteria.required_classes}
# Direct match
if label in required_lower:
if confidence < criteria.min_confidence:
return RelevanceDecision(False, "below_confidence")
return RelevanceDecision(True, "ok")
# Synonym match via COCO canonicalization
canonical = canonicalize_coco_name(label)
if canonical and canonical.lower() in required_lower:
if confidence < criteria.min_confidence:
return RelevanceDecision(False, "below_confidence")
return RelevanceDecision(True, "ok")
# Check if any required class canonicalizes to the same COCO class as the label
if canonical:
for req in criteria.required_classes:
req_canonical = canonicalize_coco_name(req)
if req_canonical and req_canonical.lower() == canonical.lower():
if confidence < criteria.min_confidence:
return RelevanceDecision(False, "below_confidence")
return RelevanceDecision(True, "ok")
return RelevanceDecision(False, "label_not_in_required_classes")
def evaluate_relevance_llm(
detected_labels: List[str],
mission_text: str,
) -> Set[str]:
"""Ask GPT which detected labels are relevant to the mission.
Called ONCE on frame 0 with the unique labels found by the detector.
Returns a set of relevant label strings (lowercased).
On API failure, falls back to accepting all labels (fail-open, logged).
"""
if not detected_labels:
return set()
if not get_api_key():
logger.warning(
"OPENAI_API_KEY not set β€” LLM relevance filter falling back to accept-all"
)
return set(detected_labels)
prompt = (
f"Given this mission: \"{mission_text}\"\n\n"
f"Which of these detected object classes are relevant to the mission?\n"
f"{json.dumps(detected_labels)}\n\n"
"Return JSON: {\"relevant_labels\": [...]}\n"
"Only include labels from the provided list that are relevant to "
"accomplishing the mission. Be inclusive β€” if in doubt, include it."
)
payload = {
"model": "gpt-4o-mini",
"temperature": 0.0,
"max_tokens": 200,
"response_format": {"type": "json_object"},
"messages": [
{"role": "system", "content": "You are a mission relevance filter. Return only JSON."},
{"role": "user", "content": prompt},
],
}
try:
resp_data = chat_completion(payload)
content, _refusal = extract_content(resp_data)
if not content:
logger.warning("GPT returned empty content for relevance filter β€” accept-all")
return set(detected_labels)
result = json.loads(content)
relevant = result.get("relevant_labels", detected_labels)
relevant_set = {label.lower() for label in relevant}
logger.info(
"LLM relevance filter: mission=%r detected=%s relevant=%s",
mission_text, detected_labels, relevant_set,
)
return relevant_set
except OpenAIAPIError as e:
logger.warning("LLM relevance API call failed: %s β€” accept-all fallback", e)
return set(detected_labels)
except (json.JSONDecodeError, KeyError, TypeError) as e:
logger.warning("LLM relevance response parse failed: %s β€” accept-all fallback", e)
return set(detected_labels)