""" Object relevance evaluation — deterministic gate between detection and GPT assessment. Public functions: evaluate_relevance(detection, criteria) -> RelevanceDecision (deterministic) evaluate_relevance_llm(detected_labels, mission_text) -> set[str] (LLM post-filter) INVARIANT INV-13 enforcement: evaluate_relevance() accepts RelevanceCriteria, NOT MissionSpecification. It cannot see context_phrases, stripped_modifiers, or any LLM-derived field. This is structural, not by convention. """ import json import logging from typing import Any, Dict, List, NamedTuple, Set from utils.openai_client import chat_completion, extract_content, get_api_key, OpenAIAPIError from coco_classes import canonicalize_coco_name from utils.schemas import RelevanceCriteria logger = logging.getLogger(__name__) class RelevanceDecision(NamedTuple): relevant: bool reason: str # "ok" | "label_not_in_required_classes" | "below_confidence" def evaluate_relevance( detection: Dict[str, Any], criteria: RelevanceCriteria, ) -> RelevanceDecision: """Evaluate whether a detection is relevant to the mission. Pure deterministic predicate — no LLM involvement. Args: detection: Detection dict with at least 'label' and 'score' keys. criteria: RelevanceCriteria with required_classes and min_confidence. Returns: RelevanceDecision(relevant=bool, reason=str). """ label = (detection.get("label") or "").lower().strip() confidence = detection.get("score", 0.0) if not label: return RelevanceDecision(False, "label_not_in_required_classes") # Build lowercase set of required classes for comparison required_lower = {c.lower() for c in criteria.required_classes} # Direct match if label in required_lower: if confidence < criteria.min_confidence: return RelevanceDecision(False, "below_confidence") return RelevanceDecision(True, "ok") # Synonym match via COCO canonicalization canonical = canonicalize_coco_name(label) if canonical and canonical.lower() in required_lower: if confidence < criteria.min_confidence: return RelevanceDecision(False, "below_confidence") return RelevanceDecision(True, "ok") # Check if any required class canonicalizes to the same COCO class as the label if canonical: for req in criteria.required_classes: req_canonical = canonicalize_coco_name(req) if req_canonical and req_canonical.lower() == canonical.lower(): if confidence < criteria.min_confidence: return RelevanceDecision(False, "below_confidence") return RelevanceDecision(True, "ok") return RelevanceDecision(False, "label_not_in_required_classes") def evaluate_relevance_llm( detected_labels: List[str], mission_text: str, ) -> Set[str]: """Ask GPT which detected labels are relevant to the mission. Called ONCE on frame 0 with the unique labels found by the detector. Returns a set of relevant label strings (lowercased). On API failure, falls back to accepting all labels (fail-open, logged). """ if not detected_labels: return set() if not get_api_key(): logger.warning( "OPENAI_API_KEY not set — LLM relevance filter falling back to accept-all" ) return set(detected_labels) prompt = ( f"Given this mission: \"{mission_text}\"\n\n" f"Which of these detected object classes are relevant to the mission?\n" f"{json.dumps(detected_labels)}\n\n" "Return JSON: {\"relevant_labels\": [...]}\n" "Only include labels from the provided list that are relevant to " "accomplishing the mission. Be inclusive — if in doubt, include it." ) payload = { "model": "gpt-4o-mini", "temperature": 0.0, "max_tokens": 200, "response_format": {"type": "json_object"}, "messages": [ {"role": "system", "content": "You are a mission relevance filter. Return only JSON."}, {"role": "user", "content": prompt}, ], } try: resp_data = chat_completion(payload) content, _refusal = extract_content(resp_data) if not content: logger.warning("GPT returned empty content for relevance filter — accept-all") return set(detected_labels) result = json.loads(content) relevant = result.get("relevant_labels", detected_labels) relevant_set = {label.lower() for label in relevant} logger.info( "LLM relevance filter: mission=%r detected=%s relevant=%s", mission_text, detected_labels, relevant_set, ) return relevant_set except OpenAIAPIError as e: logger.warning("LLM relevance API call failed: %s — accept-all fallback", e) return set(detected_labels) except (json.JSONDecodeError, KeyError, TypeError) as e: logger.warning("LLM relevance response parse failed: %s — accept-all fallback", e) return set(detected_labels)