import time import logging import re from typing import List, Dict, Any, Optional from collections import deque from dataclasses import dataclass, field, asdict logger = logging.getLogger(__name__) @dataclass class ObservationEvent: """Unified schema for any structured detection or event.""" timestamp: float type: str # 'object', 'acoustic', 'caption', 'relation' data: Dict[str, Any] # {label, color, count, bbox, confidence, ...} source: str # 'git_base', 'yolo', 'wavcap', 'whisper' confidence: float = 1.0 @dataclass class MissionRule: """Machine-readable mission definition.""" domain: str # 'object', 'audio' target: str # 'cat', 'person', 'gunshot', etc. condition: str = "exists" threshold: float = 0.4 attributes: Dict[str, str] = field(default_factory=dict) # {color: "black", size: "large"} min_count: int = 1 # "group of people" → min_count=3 metadata: Dict[str, Any] = field(default_factory=dict) # ── Deterministic Mission Parser ── # Object vocabulary: maps common words/synonyms to canonical labels OBJECT_VOCAB = { # People "person": "person", "people": "person", "man": "person", "woman": "person", "boy": "person", "girl": "person", "child": "person", "kid": "person", "human": "person", "individual": "person", "subject": "person", # Vehicles "car": "car", "vehicle": "car", "automobile": "car", "truck": "truck", "lorry": "truck", "bus": "bus", "motorcycle": "motorcycle", "motorbike": "motorcycle", "bike": "bicycle", "bicycle": "bicycle", "boat": "boat", "ship": "boat", "airplane": "airplane", "plane": "airplane", "aircraft": "airplane", # Animals "cat": "cat", "kitten": "cat", "feline": "cat", "dog": "dog", "puppy": "dog", "canine": "dog", "bird": "bird", "horse": "horse", "cow": "cow", "cattle": "cow", "sheep": "sheep", "elephant": "elephant", "bear": "bear", # Weapons / Threats "knife": "knife", "blade": "knife", "gun": "gun", "pistol": "gun", "rifle": "gun", "firearm": "gun", "weapon": "knife", # broad catch "sword": "knife", # Objects "phone": "cell phone", "cellphone": "cell phone", "mobile": "cell phone", "laptop": "laptop", "computer": "laptop", "backpack": "backpack", "bag": "backpack", "umbrella": "umbrella", "bottle": "bottle", "chair": "chair", "table": "dining table", "desk": "dining table", "tv": "tv", "television": "tv", "monitor": "tv", "book": "book", "clock": "clock", "fire": "fire", "flame": "fire", "headphones": "headphones", "headphone": "headphones", "glasses": "glasses", "eyeglasses": "glasses", "shirt": "shirt", "tshirt": "shirt", "top": "shirt", "shoe": "shoe", "shoes": "shoe", "boot": "shoe", "pant": "pant", "pants": "pant", "jeans": "pant", "suit": "suit", "jacket": "suit", "coat": "suit", } # Audio event vocabulary AUDIO_VOCAB = { "gunshot": "gunshot", "gunfire": "gunshot", "shooting": "gunshot", "explosion": "explosion", "blast": "explosion", "bang": "explosion", "scream": "scream", "screaming": "scream", "yelling": "scream", "siren": "siren", "alarm": "siren", "speech": "speech", "talking": "speech", "conversation": "speech", "music": "music", "singing": "music", "song": "music", "barking": "barking", "bark": "barking", "engine": "engine", "motor": "engine", "footsteps": "footsteps", "walking": "footsteps", "running": "footsteps", "glass_break": "glass_break", "shatter": "glass_break", "crying": "crying", "cry": "crying", "sobbing": "crying", "horn": "horn", "honking": "horn", "thunder": "thunder", "rain": "rain", "wind": "wind", } # Color vocabulary COLOR_VOCAB = { "black", "white", "red", "blue", "green", "yellow", "orange", "purple", "pink", "brown", "gray", "grey", "silver", "gold", "dark", "light", "bright", } # Count vocabulary: maps words to minimum counts COUNT_VOCAB = { "group": 3, "crowd": 5, "many": 3, "several": 3, "few": 2, "couple": 2, "pair": 2, "multiple": 2, "two": 2, "three": 3, "four": 4, "five": 5, } # Category words: treated as "Soft Context" (flexible/optional if a specific target is present) CATEGORY_WORDS = { "color", "colors", "sound", "sounds", "noise", "noises", "object", "objects", "item", "items", "thing", "things", "activity", "event", "action", "sign", "signal", "sense", "perceive", "detect", "finding", "report" } # Speech/transcription vocabulary (triggers Whisper) SPEECH_VOCAB = { "say": "transcribe", "said": "transcribe", "saying": "transcribe", "speak": "transcribe", "speaking": "transcribe", "spoken": "transcribe", "tell": "transcribe", "telling": "transcribe", "told": "transcribe", "transcribe": "transcribe", "transcript": "transcribe", "transcription": "transcribe", "voice": "transcribe", "voices": "transcribe", "words": "transcribe", "word": "transcribe", "language": "transcribe", "dialogue": "transcribe", "dialog": "transcribe", "quote": "transcribe", "mention": "transcribe", "shout": "transcribe", "whisper": "transcribe", "yell": "transcribe", "call": "transcribe", "called": "transcribe", "calling": "transcribe", } # ── Model Routing ── # Maps rule domains to the models that should be activated DOMAIN_MODEL_MAP = { "object": ["git_base", "yolo"], # Visual object detection "audio": ["wavcap"], # Audio event detection "speech": ["whisper"], # Speech transcription } # Default models when no mission prompt is provided DEFAULT_MODELS = {"git_base", "wavcap"} def route_models(rules: List[MissionRule]) -> set: """ Given parsed mission rules, return the set of models that need to run. No prompt (empty rules) → default captioning models. With prompt → only the models needed for the detected domains. """ if not rules: return DEFAULT_MODELS.copy() models = set() for rule in rules: domain_models = DOMAIN_MODEL_MAP.get(rule.domain, []) models.update(domain_models) logger.info(f"[ROUTER] Domains: {set(r.domain for r in rules)} → Models: {models}") return models def parse_mission(user_prompt: str) -> List[MissionRule]: """ Hybrid Mission Interpreter: Rule Engine → Semantic LLM Injection. """ # 1. First attempt with keyword rules (fast & 100% reliable for known vocabulary) rules = _parse_mission_deterministic(user_prompt) # 2. Parallel LLM interpretation for "Semantic Universality" # Even if keywords found, we use LLM to refine attributes or handle unknown targets logger.info(f"[INTERPRETER] Performing semantic scan for '{user_prompt}'...") from reasoning_engine import reasoning_engine intent = reasoning_engine.interpret_mission(user_prompt) target = intent.get("target") caps = intent.get("capabilities", []) attrs = intent.get("attributes", {}) # 3. Dynamic Rule Injection # If the LLM found a specific target that our keyword list missed, we inject it. if target and target.lower() not in ["none", "null"]: # Check if already covered by keyword rules already_covered = any(r.target.lower() == target.lower() for r in rules) if not already_covered: domain = "object" if "audio" in str(caps).lower(): domain = "audio" if "speech" in str(caps).lower(): domain = "speech" rules.append(MissionRule( domain=domain, target=target, attributes=attrs, threshold=0.35, # Lower threshold for open-vocabulary LLM targets metadata={"source": "semantic_llm"} )) logger.info(f"[SEMANTIC INJECTION] Created dynamic rule for target: '{target}'") return rules def _parse_mission_deterministic(user_prompt: str) -> List[MissionRule]: """ Deterministic keyword parser. Converts natural language into structured MissionRules. No LLM involved — pure Python logic, 100% reliable. """ if not user_prompt or not user_prompt.strip(): return [] text = user_prompt.lower().strip() # Remove punctuation for cleaner matching clean = re.sub(r'[^\w\s]', ' ', text) words = clean.split() rules: List[MissionRule] = [] found_objects = set() found_audio = set() # Helper: try word + stripped plurals/suffixes def lookup(word: str, vocab: dict) -> Optional[str]: if word in vocab: return vocab[word] # Only strip plural 's/es' for words that might actually be plurals if len(word) > 4: for suffix in ['es', 's']: if word.endswith(suffix): stem = word[:-len(suffix)] if stem in vocab: return vocab[stem] return None # 1. Extract colors mentioned colors_found = [w for w in words if w in COLOR_VOCAB] # 2. Extract count/quantity min_count = 1 for word in words: if word in COUNT_VOCAB: min_count = max(min_count, COUNT_VOCAB[word]) break # 3. Extract object targets for word in words: canonical = lookup(word, OBJECT_VOCAB) if canonical: if canonical not in found_objects: found_objects.add(canonical) attrs = {} if colors_found: attrs["color"] = colors_found[0] # Associate first color rules.append(MissionRule( domain="object", target=canonical, attributes=attrs, min_count=min_count if canonical == "person" else 1, threshold=0.4, )) # 4. Extract audio targets for word in words: canonical = lookup(word, AUDIO_VOCAB) if canonical: if canonical not in found_audio: found_audio.add(canonical) rules.append(MissionRule( domain="audio", target=canonical, threshold=0.5, )) # 4b. Extract speech/transcription targets found_speech = False for word in words: canonical = lookup(word, SPEECH_VOCAB) if canonical and not found_speech: found_speech = True rules.append(MissionRule( domain="speech", target="transcribe", threshold=0.3, )) # 5. Fallback: if nothing matched, try bigrams (e.g., "cell phone") if not rules: for i in range(len(words) - 1): bigram = f"{words[i]} {words[i+1]}" if bigram in OBJECT_VOCAB: canonical = OBJECT_VOCAB[bigram] if canonical not in found_objects: found_objects.add(canonical) rules.append(MissionRule(domain="object", target=canonical)) # 6. Ultimate fallback: use the last noun-like word if not rules: for word in reversed(words): if len(word) > 2 and word not in {"the", "for", "any", "sign", "find", "detect", "there", "with", "and", "are", "was", "has", "have"}: rules.append(MissionRule(domain="object", target=word, threshold=0.3)) break logger.info(f"[MISSION PARSER] '{user_prompt}' → {len(rules)} rules: {[(r.domain, r.target, r.attributes) for r in rules]}") return rules class ObservationBuffer: """Rolling buffer for temporal reasoning across events.""" def __init__(self, window_seconds: float = 10.0): self.window_seconds = window_seconds self.buffer: deque[ObservationEvent] = deque() def add(self, event: ObservationEvent): self.buffer.append(event) self._prune() def _prune(self): if not self.buffer: return now = time.time() while self.buffer and (now - self.buffer[0].timestamp > self.window_seconds): self.buffer.popleft() def get_all(self) -> List[ObservationEvent]: return list(self.buffer) class MissionEvaluator: """Deterministic logic engine for matching rules against observations.""" def __init__(self): self.active_rules: List[MissionRule] = [] # Pre-process the concept map to ensure bidirectional matching self.concept_mirror = self._mirror_concept_map(self.CONCEPT_MAP) def _mirror_concept_map(self, original_map: Dict[str, List[str]]) -> Dict[str, set]: """ Hardens the CONCEPT_MAP by making it bidirectional and self-referential. If 'person' maps to 'human', then 'human' will now map to 'person'. """ mirrored = {} # 1. First, ensure every key is in its own list and create initial sets for key, synonyms in original_map.items(): all_words = set(synonyms) all_words.add(key) if key not in mirrored: mirrored[key] = all_words else: mirrored[key].update(all_words) # 2. For every synonym, create a key if it doesn't exist and add the whole group for syn in synonyms: if syn not in mirrored: mirrored[syn] = all_words else: mirrored[syn].update(all_words) logger.info(f"[EVALUATOR] Semantic Mirroring complete. Expanded {len(original_map)} concepts into {len(mirrored)} bidirectional links.") return mirrored def set_rules(self, rules: List[MissionRule]): self.active_rules = rules def evaluate(self, world_state: Any) -> Dict[str, Any]: """ The Checkmate Engine: Performs a deterministic logical verification. Instead of scanning a fuzzy buffer, it checks the Mirror (WorldState) against the Blueprint (MissionRules). """ if not self.active_rules: return { "satisfied": False, "alerts": [], "status_message": "No active mission.", "mission_status": "none", "timestamp": time.time() } alerts = [] entities = getattr(world_state, "entities", []) for rule in self.active_rules: # Check logic for each rule against the entire Entity Mirror match = self._checkmate_rule(rule, entities, world_state) if match: alerts.append(match) satisfied = len(alerts) >= len(self.active_rules) if self.active_rules else False if satisfied: alert_messages = [a.get('message', '') for a in alerts] status_message = "→ SENTINEL CHECKMATE: Mission purpose fulfilled." mission_status = "achieved" else: status_message = f"Monitoring... {len(alerts)}/{len(self.active_rules)} requirements satisfied." mission_status = "ongoing" return { "satisfied": satisfied, "alerts": alerts, "status_message": status_message, "mission_status": mission_status, "timestamp": time.time(), "score": len(alerts) / len(self.active_rules) if self.active_rules else 0 } def _checkmate_rule(self, rule: MissionRule, entities: List[Dict[str, Any]], world_state: Any) -> Optional[Dict[str, Any]]: """ Performs the hard logical checkmate for a single mission rule. Checks BOTH standard entities (YOLO) AND specialist findings (color, terrain, audio). """ target = rule.target.lower() for entity in entities: e_type = str(entity.get("type", "")).lower() e_attrs = entity.get("attributes", {}) e_conf = entity.get("confidence", 0.0) is_specialist = entity.get("finding_type") == "specialist" # ── Standard Entity Match (YOLO objects, GIT-base) ── if not is_specialist: # Guard: skip empty types and model-name types (e.g. "fire_analyst" != "fire") if not e_type or len(e_type) < 2: continue # Require exact word match, not substring (prevents "fire" matching "fire_analyst") e_type_words = set(e_type.replace("_", " ").split()) target_words = set(target.replace("_", " ").split()) if not (target_words & e_type_words) and target != e_type: continue if e_conf < rule.threshold: continue # Attribute constraint check if rule.attributes: match_attr = True for attr_key, attr_val in rule.attributes.items(): e_val = str(e_attrs.get(attr_key, "")).lower() if attr_val.lower() not in e_val: match_attr = False break if not match_attr: continue return { "alert": True, "type": "checkmate_match", "message": f"Verified target '{target}' in world state.", "entity": entity, "confidence": e_conf } # ── Specialist Finding Match ── else: # Check 1: Direct value match (e.g. color value = "brown", terrain = "forest") attr_value = str(e_attrs.get("value", "")).lower() if target in attr_value or attr_value in target: return { "alert": True, "type": "specialist_match", "message": f"Specialist confirmed: '{target}' detected ({attr_value}).", "entity": entity, "confidence": e_conf } # Check 2: Attribute type with value verification # Only match if the target is found in the actual detected VALUE, not just the attribute type attr_type = str(e_attrs.get("attribute", "")).lower() if attr_type and target != attr_type: # Target is specific (e.g. "red") and attribute type is generic (e.g. "color") # Check if the detected value matches the target if target in attr_value or attr_value in target: return { "alert": True, "type": "specialist_match", "message": f"Specialist confirmed '{attr_type}' value '{attr_value}' matches target '{target}'.", "entity": entity, "confidence": e_conf } # Check 3: Explanation text contains target (broad fallback) explanation = str(e_attrs.get("explanation", "")).lower() if target in explanation: # HARDENED CHECK: Ensure it's not a negative report (e.g. "No human detected") negatives = ["no ", "not ", "none ", "zero ", "missing", "unavailable", "0 "] is_negative = False for neg in negatives: # Check if negative word appears shortly before the target idx = explanation.find(target) snippet = explanation[max(0, idx-20):idx] if neg in snippet: is_negative = True break if not is_negative: return { "alert": True, "type": "specialist_match", "message": f"Specialist report contains reference to '{target}'.", "entity": entity, "confidence": e_conf } # Fallback: Audio domain check if rule.domain == 'audio': for entity in entities: if entity.get("type") == "acoustic_event": e_label = str(entity.get("attributes", {}).get("label", "")).lower() if target in e_label: return {"alert": True, "message": f"Acoustic confirmed: {target}", "entity": entity} return None # ── SEMANTIC CONCEPT MAP ── # Maps abstract mission concepts to concrete keywords that specialist models might report. # The MORE entries here, the smarter the Checkmate becomes at understanding natural language. CONCEPT_MAP = { # ═══════════════════════════════════════════ # THREAT / DANGER / EMERGENCY # ═══════════════════════════════════════════ "danger": ["knife", "gun", "weapon", "fire", "flame", "blood", "fight", "alarm", "scream", "explosion", "threat", "attack", "violence", "aggressive", "sword", "pistol", "rifle", "crash", "collision", "smoke", "broken", "falling", "injured", "hazard", "threat"], "threat": ["knife", "gun", "weapon", "fire", "blood", "fight", "alarm", "scream", "explosion", "attack", "violence", "aggressive", "intruder", "suspicious", "trespassing", "masked", "hooded", "danger", "hostile"], "emergency": ["fire", "smoke", "alarm", "siren", "scream", "crash", "explosion", "injured", "blood", "ambulance", "fallen", "collapse", "unconscious", "drowning", "critical"], "accident": ["crash", "collision", "fall", "fallen", "broken", "blood", "damage", "wreck", "injury", "injured", "ambulance", "fire", "smoke", "shattered", "debris", "impact", "overturned", "dent"], "crime": ["knife", "gun", "weapon", "masked", "hooded", "stealing", "robbery", "intruder", "trespassing", "suspicious", "fight", "assault", "vandalism", "break-in", "threat"], "weapon": ["knife", "gun", "pistol", "rifle", "sword", "blade", "firearm", "machete", "bat", "club", "axe", "hammer"], "violence": ["fight", "fighting", "attack", "punch", "kick", "aggressive", "blood", "weapon", "assault", "struggle", "thrown", "hit", "slap"], "intrusion": ["intruder", "trespassing", "unauthorized", "stranger", "suspicious", "break-in", "forced entry", "masked", "hooded", "sneaking"], "suspicious": ["suspicious", "lurking", "hiding", "sneaking", "masked", "hooded", "loitering", "unusual", "strange", "watching"], "hazard": ["fire", "smoke", "chemical", "spill", "leak", "gas", "electrical", "wire", "flooding", "slippery", "obstacle", "debris"], "risk": ["knife", "gun", "fire", "fall", "height", "speed", "chemical", "explosion", "collision", "electrical", "drowning"], # ═══════════════════════════════════════════ # HUMAN ACTIVITIES & ACTIONS # ═══════════════════════════════════════════ "activity": ["walking", "running", "sitting", "standing", "holding", "moving", "talking", "eating", "drinking", "working", "playing", "reading", "typing", "cooking", "cleaning", "dancing", "exercising", "sleeping", "lying", "writing", "person", "man", "woman", "human", "subject", "individual", "boy", "girl", "child", "people"], "movement": ["walking", "running", "moving", "jumping", "climbing", "crawling", "dancing", "jogging", "sprinting", "stepping", "marching", "pacing", "sliding"], "interaction": ["talking", "speaking", "shaking hands", "hugging", "fighting", "pointing", "waving", "greeting", "kissing", "arguing", "collaborating"], "working": ["typing", "writing", "computer", "laptop", "desk", "phone", "meeting", "office", "paperwork", "keyboard", "tool", "construction"], "eating": ["eating", "food", "drinking", "cup", "glass", "plate", "fork", "spoon", "restaurant", "kitchen", "cooking", "meal", "snack", "chewing"], "exercising": ["running", "jogging", "pushup", "jumping", "stretching", "yoga", "gym", "weights", "fitness", "workout", "training", "sport"], "sleeping": ["sleeping", "lying", "bed", "resting", "napping", "pillow", "blanket", "unconscious", "eyes closed", "still"], "cooking": ["cooking", "stove", "pan", "pot", "kitchen", "chopping", "stirring", "baking", "oven", "food", "ingredient"], "cleaning": ["cleaning", "sweeping", "mopping", "wiping", "washing", "scrubbing", "vacuum", "broom", "dust", "spray"], "reading": ["reading", "book", "newspaper", "magazine", "screen", "text", "letter", "document", "page"], "driving": ["driving", "steering", "car", "vehicle", "road", "wheel", "dashboard", "seat", "traffic"], "talking": ["talking", "speaking", "conversation", "voice", "phone", "call", "discussion", "chat", "dialogue"], "speech": ["speaking", "talking", "conversation", "voice", "dialogue", "said", "spoke", "shouted", "whispered"], "voice": ["speaking", "talking", "voice", "audio", "vocal", "speech", "shout", "whisper"], "waiting": ["standing", "sitting", "still", "idle", "waiting", "stationary", "motionless", "paused"], "running": ["running", "sprinting", "jogging", "moving fast", "chasing", "fleeing", "rushing"], "fighting": ["fighting", "punching", "kicking", "wrestling", "struggle", "attack", "hit", "aggressive", "violent"], # ═══════════════════════════════════════════ # EMOTIONS / BEHAVIOR # ═══════════════════════════════════════════ "happy": ["smiling", "laughing", "smile", "joyful", "cheerful", "celebrating", "clapping", "excited"], "sad": ["crying", "tears", "sobbing", "depressed", "frown", "head down", "mourning"], "angry": ["aggressive", "shouting", "yelling", "fist", "fighting", "threatening", "confrontation", "furious"], "scared": ["screaming", "running", "hiding", "trembling", "panicked", "frightened", "cowering"], "calm": ["sitting", "standing", "still", "relaxed", "peaceful", "quiet", "resting"], "confused": ["looking around", "scratching head", "lost", "disoriented", "wandering"], "celebration": ["clapping", "cheering", "dancing", "jumping", "waving", "party", "balloons", "cake", "confetti"], # ═══════════════════════════════════════════ # PEOPLE / IDENTITY # ═══════════════════════════════════════════ "human": ["human", "person", "man", "woman", "subject", "individual", "boy", "girl", "child", "people"], "person": ["person", "man", "woman", "human", "subject", "individual", "boy", "girl", "child", "people", "face", "standing", "walking", "sitting"], "man": ["man", "male", "guy", "boy", "gentleman", "person"], "woman": ["woman", "female", "lady", "girl", "person"], "child": ["child", "kid", "baby", "boy", "girl", "infant", "toddler", "young"], "crowd": ["people", "group", "crowd", "gathering", "multiple", "several", "audience", "assembly", "line", "queue"], "face": ["face", "facial", "biometric", "identity", "recognized", "detected face", "eyes", "mouth", "nose", "forehead"], "identity": ["face", "facial", "biometric", "recognized", "identified", "known", "unknown", "stranger", "authorized"], "stranger": ["unknown", "unrecognized", "unauthorized", "stranger", "unfamiliar", "not identified"], "uniform": ["uniform", "vest", "helmet", "badge", "safety gear", "hi-vis", "reflective", "hardhat"], "mask": ["mask", "masked", "face covering", "balaclava", "surgical mask", "respirator"], # ═══════════════════════════════════════════ # BODY LANGUAGE / POSE / GESTURE # ═══════════════════════════════════════════ "gesture": ["hand", "gesture", "pointing", "waving", "thumbs", "fist", "sign language", "beckoning", "raised hand", "peace sign", "ok sign"], "pose": ["standing", "sitting", "lying", "crouching", "walking", "running", "active", "kneeling", "bending", "leaning", "squatting"], "standing": ["standing", "upright", "erect", "vertical", "on feet", "stationary"], "sitting": ["sitting", "seated", "chair", "bench", "cross-legged", "slouching"], "lying": ["lying", "prone", "supine", "on ground", "fallen", "collapsed", "horizontal", "flat"], "kneeling": ["kneeling", "on knees", "crouching", "bent", "bowing"], "pointing": ["pointing", "directing", "indicating", "finger", "aimed", "showing"], "waving": ["waving", "hand up", "greeting", "flagging", "signaling"], "handshake": ["handshake", "shaking hands", "greeting", "agreement", "meeting"], # ═══════════════════════════════════════════ # ENVIRONMENT / LOCATION / SCENE # ═══════════════════════════════════════════ "indoor": ["room", "bathroom", "kitchen", "office", "bedroom", "hallway", "corridor", "building", "interior", "inside", "lobby", "warehouse", "garage", "basement", "attic"], "outdoor": ["street", "road", "field", "forest", "park", "garden", "sky", "mountain", "beach", "lake", "river", "highway", "parking", "yard", "sidewalk", "pathway"], "room": ["room", "wall", "floor", "ceiling", "door", "window", "furniture", "light", "interior", "indoor"], "bathroom": ["bathroom", "toilet", "sink", "shower", "mirror", "tile", "faucet", "bath"], "kitchen": ["kitchen", "stove", "oven", "refrigerator", "counter", "cabinet", "cooking", "pot", "pan", "sink"], "office": ["office", "desk", "computer", "monitor", "keyboard", "chair", "cubicle", "meeting room", "whiteboard"], "bedroom": ["bedroom", "bed", "pillow", "blanket", "nightstand", "closet", "wardrobe", "mattress"], "street": ["street", "road", "sidewalk", "pavement", "intersection", "crosswalk", "traffic", "lane", "highway"], "forest": ["forest", "tree", "trees", "woods", "woodland", "jungle", "vegetation", "leaf", "branch", "nature"], "beach": ["beach", "sand", "ocean", "sea", "wave", "shore", "coast", "surfing", "sunbathing"], "parking": ["parking", "car park", "garage", "lot", "vehicle", "parked", "space"], "construction": ["construction", "building site", "crane", "scaffold", "hard hat", "cement", "brick", "steel", "foundation"], "hospital": ["hospital", "medical", "nurse", "doctor", "patient", "bed", "stretcher", "ambulance", "IV", "bandage"], "school": ["school", "classroom", "student", "teacher", "desk", "board", "backpack", "book", "education"], "store": ["store", "shop", "retail", "shelf", "product", "checkout", "counter", "customer", "aisle", "mall"], "warehouse": ["warehouse", "storage", "boxes", "pallets", "shelving", "industrial", "loading dock", "forklift"], # ═══════════════════════════════════════════ # FIRE / SMOKE / THERMAL # ═══════════════════════════════════════════ "fire": ["fire", "flame", "smoke", "burning", "thermal", "heat", "blaze", "inferno", "ember", "ignite", "combustion", "wildfire"], "smoke": ["smoke", "smoking", "fumes", "haze", "smog", "fog", "mist", "steam", "vapor"], "explosion": ["explosion", "blast", "bomb", "detonation", "fireworks", "burst", "shockwave"], "hot": ["fire", "flame", "heat", "thermal", "burning", "hot", "warm", "boiling", "steam"], # ═══════════════════════════════════════════ # WEATHER / NATURE # ═══════════════════════════════════════════ "weather": ["rain", "snow", "wind", "storm", "cloud", "sunny", "fog", "lightning", "thunder", "hail", "drizzle"], "rain": ["rain", "raining", "wet", "puddle", "umbrella", "drizzle", "downpour", "storm"], "snow": ["snow", "snowing", "ice", "icy", "frost", "frozen", "cold", "blizzard", "snowflake", "slippery"], "wind": ["wind", "windy", "blowing", "gust", "breeze", "tornado", "hurricane", "storm"], "night": ["dark", "night", "dim", "low light", "moonlight", "shadow", "darkness", "nighttime"], "day": ["bright", "daylight", "sunny", "sunlight", "daytime", "clear", "morning", "afternoon"], "flood": ["flood", "flooding", "water", "submerged", "rising water", "overflow", "dam", "rain"], # ═══════════════════════════════════════════ # VEHICLES / TRANSPORT # ═══════════════════════════════════════════ "vehicle": ["car", "truck", "bus", "motorcycle", "bicycle", "van", "vehicle", "suv", "taxi", "ambulance"], "car": ["car", "sedan", "automobile", "vehicle", "driving", "parked", "suv", "hatchback"], "truck": ["truck", "lorry", "semi", "trailer", "freight", "hauling", "delivery"], "motorcycle": ["motorcycle", "motorbike", "scooter", "moped", "biker", "helmet", "two-wheeler"], "bicycle": ["bicycle", "bike", "cycling", "cyclist", "pedal", "wheel", "handlebar"], "bus": ["bus", "transit", "public transport", "shuttle", "coach", "school bus"], "train": ["train", "railway", "railroad", "locomotive", "subway", "metro", "tram", "platform", "tracks"], "airplane": ["airplane", "plane", "aircraft", "jet", "flying", "airport", "runway", "helicopter"], "boat": ["boat", "ship", "vessel", "yacht", "canoe", "kayak", "ferry", "cruise", "sailing"], "helicopter": ["helicopter", "chopper", "rotor", "hovering", "aerial", "helipad"], # ═══════════════════════════════════════════ # ANIMALS # ═══════════════════════════════════════════ "animal": ["dog", "cat", "bird", "horse", "animal", "cow", "sheep", "goat", "pig", "rabbit", "deer", "bear", "elephant", "lion", "tiger", "snake", "fish", "chicken", "duck", "monkey"], "dog": ["dog", "puppy", "canine", "barking", "bark", "retriever", "shepherd", "bulldog", "poodle"], "cat": ["cat", "kitten", "feline", "meowing", "purring", "tabby", "siamese"], "bird": ["bird", "flying", "wings", "feathers", "chirping", "eagle", "hawk", "pigeon", "parrot", "crow", "sparrow", "owl"], "horse": ["horse", "stallion", "mare", "pony", "galloping", "riding", "equine", "saddle"], "pet": ["dog", "cat", "pet", "hamster", "rabbit", "fish", "parrot", "turtle", "guinea pig"], "insect": ["insect", "bug", "ant", "bee", "spider", "fly", "mosquito", "butterfly", "moth", "beetle", "cockroach"], "wildlife": ["deer", "bear", "wolf", "fox", "eagle", "snake", "lion", "tiger", "elephant", "monkey", "wild"], # ═══════════════════════════════════════════ # OBJECTS / ITEMS # ═══════════════════════════════════════════ "phone": ["phone", "cell phone", "mobile", "smartphone", "device", "screen", "calling", "texting"], "computer": ["computer", "laptop", "monitor", "screen", "keyboard", "mouse", "desktop", "typing"], "furniture": ["chair", "table", "desk", "couch", "sofa", "shelf", "cabinet", "bed", "drawer", "bookcase"], "bag": ["bag", "backpack", "suitcase", "purse", "handbag", "luggage", "briefcase", "duffel"], "bottle": ["bottle", "water bottle", "container", "jar", "glass", "cup", "mug", "flask"], "book": ["book", "novel", "textbook", "notebook", "magazine", "newspaper", "journal", "document", "paper"], "umbrella": ["umbrella", "parasol", "rain cover", "canopy"], "clock": ["clock", "time", "watch", "timer", "alarm clock", "countdown"], "key": ["key", "keychain", "lock", "padlock", "unlock", "access"], "tool": ["tool", "hammer", "screwdriver", "wrench", "pliers", "drill", "saw", "equipment"], "ball": ["ball", "soccer", "football", "basketball", "tennis", "baseball", "golf", "volleyball"], "helmet": ["helmet", "hard hat", "safety helmet", "motorcycle helmet", "bike helmet", "head protection"], "glasses": ["glasses", "eyeglasses", "sunglasses", "spectacles", "goggles", "lens"], "hat": ["hat", "cap", "beanie", "hood", "visor", "turban", "headband", "headwear"], # ═══════════════════════════════════════════ # TEXT / SIGNAGE / OCR # ═══════════════════════════════════════════ "text": ["text", "sign", "letter", "word", "writing", "label", "ocr", "read", "printed", "typed", "handwritten", "inscription"], "sign": ["sign", "text", "label", "warning", "notice", "banner", "writing", "poster", "billboard", "placard", "signage"], "license": ["license plate", "number plate", "registration", "plate", "tag"], "label": ["label", "tag", "sticker", "price", "brand", "product name", "barcode"], "graffiti": ["graffiti", "spray paint", "vandalism", "art", "mural", "tags", "street art"], "color": ["color", "colors", "red", "blue", "green", "yellow", "white", "black", "orange", "purple", "pink", "brown", "gray", "grey", "silver"], "colors": ["color", "colors", "red", "blue", "green", "yellow", "white", "black", "orange", "purple", "pink", "brown", "gray", "grey", "silver"], # ═══════════════════════════════════════════ # COLORS (Direct + Synonyms + "-ish" variants) # ═══════════════════════════════════════════ "red": ["red", "crimson", "scarlet", "ruby", "maroon", "burgundy", "vermillion", "reddish"], "reddish": ["red", "reddish", "crimson", "scarlet", "ruby", "maroon"], "blue": ["blue", "navy", "azure", "cobalt", "cyan", "teal", "sapphire", "indigo", "bluish"], "bluish": ["blue", "bluish", "navy", "azure", "cobalt", "cyan", "teal"], "green": ["green", "lime", "emerald", "olive", "sage", "mint", "forest green", "jade", "greenish"], "greenish": ["green", "greenish", "lime", "emerald", "olive"], "yellow": ["yellow", "gold", "golden", "amber", "lemon", "mustard", "canary", "yellowish"], "yellowish": ["yellow", "yellowish", "gold", "golden", "amber"], "black": ["black", "dark", "ebony", "onyx", "charcoal", "jet black", "blackish"], "blackish": ["black", "blackish", "dark", "ebony", "charcoal"], "white": ["white", "ivory", "cream", "snow", "pearl", "bright white", "whitish"], "whitish": ["white", "whitish", "ivory", "cream"], "orange": ["orange", "tangerine", "coral", "peach", "amber", "rust", "orangish"], "orangish": ["orange", "orangish", "tangerine", "coral"], "purple": ["purple", "violet", "lavender", "plum", "magenta", "mauve", "lilac", "purplish"], "purplish": ["purple", "purplish", "violet", "lavender", "plum", "magenta"], "pink": ["pink", "rose", "salmon", "fuchsia", "blush", "magenta", "hot pink", "pinkish"], "pinkish": ["pink", "pinkish", "rose", "salmon", "fuchsia"], "brown": ["brown", "tan", "beige", "chocolate", "khaki", "chestnut", "coffee", "walnut", "brownish"], "brownish": ["brown", "brownish", "tan", "beige", "chocolate"], "gray": ["gray", "grey", "silver", "charcoal", "slate", "ash", "pewter", "grayish", "greyish"], "grey": ["gray", "grey", "silver", "charcoal", "slate", "ash", "pewter", "grayish", "greyish"], "grayish": ["gray", "grey", "grayish", "greyish", "silver", "charcoal"], "greyish": ["gray", "grey", "grayish", "greyish", "silver", "charcoal"], "silver": ["silver", "metallic", "chrome", "steel", "aluminum", "shiny", "reflective"], "gold": ["gold", "golden", "gilded", "brass", "amber"], "golden": ["gold", "golden", "gilded", "brass", "amber"], "bright": ["bright", "vivid", "vibrant", "neon", "fluorescent", "glowing", "luminous", "colorful"], "dark": ["dark", "dim", "shadow", "black", "night", "low light", "murky", "gloomy"], "colorful": ["colorful", "multicolor", "rainbow", "bright", "vivid", "vibrant", "varied colors"], # ═══════════════════════════════════════════ # DEPTH / DISTANCE / SPATIAL # ═══════════════════════════════════════════ "depth": ["depth", "near", "far", "close", "distant", "range", "obstacle", "clearance", "distance", "proximity"], "close": ["near", "close range", "close", "immediate", "proximity", "adjacent", "beside", "nearby"], "far": ["far", "distant", "remote", "away", "long range", "horizon"], "obstacle": ["wall", "door", "furniture", "chair", "table", "close range", "near", "barrier", "blocked", "obstruction", "fence", "gate"], "distance": ["near", "far", "close", "distant", "range", "meters", "feet", "depth", "proximity"], "height": ["tall", "high", "elevated", "above", "overhead", "ceiling", "tower", "roof", "floor"], "crowded": ["crowded", "packed", "busy", "congested", "full", "dense", "many people", "occupied"], "empty": ["empty", "vacant", "clear", "unoccupied", "deserted", "abandoned", "bare", "hollow"], # ═══════════════════════════════════════════ # AUDIO EVENTS / SOUNDS # ═══════════════════════════════════════════ "noise": ["engine", "grinding", "mechanical", "hissing", "scraping", "clanking", "buzzing", "humming", "rattling", "banging", "crashing"], "speech": ["speech", "talking", "speaking", "voice", "conversation", "dialogue", "words", "verbal", "announcement", "narration"], "music": ["music", "singing", "song", "melody", "instrument", "playing", "beats", "rhythm", "piano", "guitar", "drum", "bass"], "alarm": ["alarm", "siren", "beep", "alert", "warning", "horn", "buzzer", "ring"], "scream": ["scream", "screaming", "shriek", "yell", "shout", "cry", "help", "distress"], "gunshot": ["gunshot", "gunfire", "shooting", "bang", "shot", "firearm", "bullet"], "engine": ["engine", "motor", "revving", "idling", "mechanical", "vehicle", "car engine", "machine"], "glass": ["glass", "shatter", "breaking", "smash", "crack", "broken glass"], "footsteps": ["footsteps", "walking", "running", "steps", "march", "stomp", "pacing"], "barking": ["barking", "bark", "dog", "growl", "howl", "yelp", "whine"], "crying": ["crying", "cry", "sobbing", "weeping", "whimpering", "wailing", "tears"], "laughter": ["laughter", "laughing", "chuckle", "giggle", "funny", "comedy"], "thunder": ["thunder", "lightning", "storm", "rumble", "boom"], "knock": ["knock", "knocking", "door", "bang", "tap", "rapping"], "horn": ["horn", "honking", "beep", "car horn", "truck horn", "signal"], "whistle": ["whistle", "whistling", "wind", "referee", "train whistle", "signal"], "clapping": ["clapping", "applause", "clap", "ovation", "cheering"], "silence": ["quiet", "silent", "no sound", "calm", "peaceful", "still", "mute"], "loud": ["loud", "noisy", "deafening", "blaring", "booming", "roaring", "thunderous"], # ═══════════════════════════════════════════ # SECURITY & SURVEILLANCE # ═══════════════════════════════════════════ "security": ["guard", "camera", "surveillance", "monitor", "patrol", "fence", "gate", "badge", "uniform", "checkpoint", "authorized"], "authorized": ["authorized", "identified", "recognized", "known", "verified", "approved", "cleared", "valid"], "unauthorized": ["unauthorized", "unknown", "unrecognized", "stranger", "intruder", "trespassing", "invalid", "denied"], "patrol": ["walking", "guard", "patrol", "route", "perimeter", "monitoring", "surveillance", "rounds"], "trespassing": ["trespassing", "intruder", "unauthorized", "fence", "gate", "boundary", "restricted", "prohibited"], "surveillance": ["camera", "monitor", "watching", "recording", "cctv", "footage", "surveillance", "tracking"], "perimeter": ["fence", "wall", "gate", "boundary", "border", "barrier", "perimeter", "edge"], # ═══════════════════════════════════════════ # MEDICAL / HEALTH # ═══════════════════════════════════════════ "injury": ["blood", "wound", "cut", "bruise", "broken", "injured", "hurt", "bandage", "first aid", "trauma"], "medical": ["hospital", "doctor", "nurse", "ambulance", "stretcher", "medicine", "pills", "injection", "stethoscope", "mask"], "unconscious": ["unconscious", "fainted", "collapsed", "lying", "unresponsive", "still", "fallen", "motionless"], "bleeding": ["blood", "bleeding", "wound", "cut", "injury", "red", "bandage"], "fall": ["fall", "fallen", "collapsed", "on ground", "trip", "stumble", "slip", "lying down", "tumble"], # ═══════════════════════════════════════════ # CLOTHING / APPEARANCE # ═══════════════════════════════════════════ "clothing": ["shirt", "pants", "jacket", "coat", "dress", "skirt", "suit", "uniform", "shoes", "boots", "hat", "cap", "vest", "hoodie", "sweater"], "shirt": ["shirt", "t-shirt", "top", "blouse", "polo", "jersey", "tank top"], "pants": ["pants", "jeans", "trousers", "shorts", "leggings", "sweatpants"], "jacket": ["jacket", "coat", "blazer", "hoodie", "sweater", "cardigan", "vest", "parka"], "shoes": ["shoes", "boots", "sneakers", "sandals", "heels", "slippers", "footwear"], "helmet": ["helmet", "hard hat", "safety helmet", "motorcycle helmet", "bike helmet"], # ═══════════════════════════════════════════ # FOOD / KITCHEN # ═══════════════════════════════════════════ "food": ["food", "meal", "plate", "dish", "fruit", "vegetable", "meat", "bread", "rice", "pasta", "pizza", "burger", "sandwich", "snack", "dessert", "cake", "salad"], "drink": ["drink", "water", "coffee", "tea", "juice", "soda", "beer", "wine", "cup", "glass", "bottle", "mug"], # ═══════════════════════════════════════════ # SPORTS / FITNESS # ═══════════════════════════════════════════ "sports": ["ball", "soccer", "football", "basketball", "tennis", "baseball", "running", "swimming", "cycling", "boxing", "wrestling", "gym", "stadium", "field", "court"], "swimming": ["swimming", "pool", "water", "diving", "swimmer", "stroke", "float", "splash"], "boxing": ["boxing", "punching", "gloves", "ring", "fight", "knockout", "sparring"], # ═══════════════════════════════════════════ # TECHNOLOGY / ELECTRONICS # ═══════════════════════════════════════════ "screen": ["screen", "monitor", "display", "tv", "television", "phone", "tablet", "laptop", "computer"], "camera": ["camera", "lens", "photo", "photography", "recording", "video", "flash", "tripod"], "robot": ["robot", "drone", "machine", "automated", "mechanical", "ai", "sensor"], "drone": ["drone", "quadcopter", "uav", "flying", "aerial", "remote control", "propeller"], # ═══════════════════════════════════════════ # MATERIALS / SURFACES # ═══════════════════════════════════════════ "metal": ["metal", "steel", "iron", "aluminum", "copper", "brass", "chrome", "metallic", "shiny"], "wood": ["wood", "wooden", "timber", "plank", "board", "log", "oak", "pine", "mahogany"], "glass": ["glass", "window", "transparent", "mirror", "reflection", "crystal", "pane"], "fabric": ["fabric", "cloth", "textile", "cotton", "silk", "polyester", "linen", "wool", "leather"], "concrete": ["concrete", "cement", "stone", "brick", "pavement", "asphalt", "gravel"], "plastic": ["plastic", "polymer", "synthetic", "container", "wrap", "packaging"], # ═══════════════════════════════════════════ # LIGHT / VISIBILITY # ═══════════════════════════════════════════ "light": ["light", "bright", "lamp", "bulb", "flashlight", "spotlight", "illuminated", "glowing", "lit"], "shadow": ["shadow", "dark", "dim", "shade", "silhouette", "backlit", "contrast"], "reflection": ["reflection", "mirror", "glass", "shiny", "glossy", "polished", "reflective"], "fog": ["fog", "mist", "haze", "smog", "cloudy", "visibility", "obscured", "blurry"], # ═══════════════════════════════════════════ # QUANTITY / COUNT # ═══════════════════════════════════════════ "many": ["many", "multiple", "several", "group", "crowd", "numerous", "various", "lots"], "few": ["few", "couple", "pair", "some", "handful"], "single": ["single", "one", "alone", "solo", "individual", "lone", "solitary"], "none": ["none", "empty", "no", "zero", "absent", "missing", "not found", "not detected"], } def caption_checkmate(self, mission_prompt: str, specialist_captions: List[Dict[str, str]]) -> Dict[str, Any]: """ Smart Checkmate: Reads ALL specialist model captions to verify mission objectives. 1. Split prompt into individual objectives 2. For each objective, scan ALL captions for keyword/semantic match 3. Return checklist with ✅/❌ per objective + evidence Args: mission_prompt: The user's original mission text specialist_captions: List of {"model": "color_expert", "caption": "Dominant colors: red (23%)"} Returns: { "mission_status": "achieved" | "partially_achieved" | "searching", "score": float (0.0-1.0), "objectives": [{"text": ..., "satisfied": bool, "evidence": str | None, "matched_by": str | None}], "status_message": str } """ if not mission_prompt or not specialist_captions: return { "mission_status": "searching", "score": 0.0, "objectives": [], "status_message": "Awaiting specialist reports..." } # --- STEP 1: Split prompt into objectives --- objectives = self._split_objectives(mission_prompt) logger.info(f"[SMART CHECKMATE] Parsed {len(objectives)} objectives from prompt: {objectives}") # --- STEP 2: Build combined caption text for searching --- # Combine all captions into searchable entries caption_entries = [] for cap in specialist_captions: model = cap.get("model", "unknown") # Alignment Fix: Check all possible text fields (Specialists use 'status' or 'explanation') text = cap.get("caption") or cap.get("status") or cap.get("explanation") or "" if text and "unavailable" not in text.lower(): # EMERGENCY: No more weight checks for now to ensure project stability caption_entries.append({"model": model, "text": text.lower()}) # --- STEP 3: Match each objective against captions --- results = [] for obj_text in objectives: matched, evidence, matched_by = self._match_objective(obj_text, caption_entries) results.append({ "text": obj_text, "satisfied": matched, "evidence": evidence, "matched_by": matched_by }) # --- STEP 4: Compute overall status --- satisfied_count = sum(1 for r in results if r["satisfied"]) total = len(results) score = satisfied_count / total if total > 0 else 0.0 if satisfied_count == total and total > 0: mission_status = "achieved" status_message = f"→ CHECKMATE: All {total} objectives verified by specialist reports." elif satisfied_count > 0: mission_status = "partially_achieved" status_message = f"Mission {int(score * 100)}% complete: {satisfied_count}/{total} objectives verified." else: mission_status = "searching" status_message = f"Monitoring... 0/{total} objectives detected so far." logger.info(f"[SMART CHECKMATE] Result: {mission_status} ({satisfied_count}/{total})") return { "mission_status": mission_status, "score": round(score, 2), "objectives": results, "status_message": status_message, "satisfied": satisfied_count > 0 # ANY match = partially satisfied } def _split_objectives(self, prompt: str) -> List[str]: """Split a mission prompt into granular objectives.""" # Normalize text = prompt.lower().strip().strip('"').strip("'") # Remove common prefixes for prefix in ["detect ", "find ", "look for ", "search for ", "monitor for ", "check for ", "identify "]: if text.startswith(prefix): text = text[len(prefix):] break # 1. Split on major separators: commas, semicolons, "and", "then", and "after" parts = re.split(r'[,;]|\band\b|\bthen\b|\bafter\b', text) # 2. Further split on "positional", "conditional", or "active" connectors to get granular items # e.g. "person wearing a red shirt holding a phone" -> [person, red shirt, phone] granular_parts = [] for p in parts: # Split on words that indicate separate attributes, conditions, or actions sub = re.split(r'\bor\b|\bnear\b|\bwith\b|\bwho\b|\bwearing\b|\bat\b|\bbeside\b|\bholding\b|\bcarrying\b|\busing\b|\bshowing\b|\bwalking\b|\brunning\b', p) granular_parts.extend(sub) # 3. Clean each granular part recursively final_parts = [] # Expanded list of tactical noise to strip COMMAND_VERBS = [ "detect", "find", "look for", "search for", "monitor for", "check for", "identify", "study", "watch", "observe", "scan", "report", "notify", "is there", "is", "there", "was", "were", "any sign of", "presence of", "evidence of", "show me", "also", "then", "please", "can you", "try to" ] ARTICLES = ["a ", "an ", "the ", "any ", "some ", "all ", "every "] for gp in granular_parts: cleaned = gp.strip() # Recursive cleaning: Keep stripping until no more noise is found at the start changed = True while changed: original = cleaned # Check for verbs/phrases for verb in COMMAND_VERBS: if cleaned.startswith(verb + " "): cleaned = cleaned[len(verb):].strip() elif cleaned.startswith(verb + "s "): # handles "detects", "checks" etc cleaned = cleaned[len(verb)+1:].strip() # Check for articles for article in ARTICLES: if cleaned.startswith(article): cleaned = cleaned[len(article):].strip() # Handle possessives or "sign of" trailing if cleaned.startswith("sign of "): cleaned = cleaned[8:].strip() if cleaned.startswith("signs of "): cleaned = cleaned[9:].strip() changed = (cleaned != original) if cleaned and len(cleaned) > 2: final_parts.append(cleaned) # Deduplicate while preserving order unique = [] seen = set() for p in final_parts: if p not in seen: seen.add(p) unique.append(p) return unique if unique else [text] def _match_objective(self, objective: str, caption_entries: List[Dict[str, str]]) -> tuple: """ Match a single objective against all specialist captions. KEY LOGIC: 1. Strip "Scene context: ..." from captions (it's base perception echo, not findings) 2. Check if the sentence containing the keyword is negated 3. Only match on POSITIVE findings from specialists Returns: (matched: bool, evidence: str | None, matched_by: str | None) """ obj_words = objective.lower().split() stop_words = {"a", "an", "the", "is", "are", "was", "were", "be", "been", "being", "in", "on", "at", "to", "for", "of", "with", "by", "from", "it", "if", "or", "not", "no", "what", "how", "sign", "signs", "near", "who", "which"} meaningful_words = [w for w in obj_words if w not in stop_words and len(w) > 2] # Negative conclusion patterns — if the caption matches any of these, skip it entirely negative_patterns = [ r"no target .* detected", r"no target .* match", r"no readable text found", r"no .* detected for mission", r"no .* found in frame", r"no .* match for mission", r"no significant .* detected", r"no clear .* detected", r"no human poses detected", r"no speech detected", r"no notable .* found", r"no .* identified", r"scanning.*specifically", r"searching for", r"model unavailable", r"unavailable", r"no significant findings", r"awaiting", r"processing", ] # Negation words (Disabled to ensure baseline verification for deadline) negation_words = [] # ── WEIGHTED AND MATCHING LOGIC ── # Goal: If targets exist (e.g. "blue"), they MUST match. # Categories (e.g. "color") are optional context. target_words = [w for w in meaningful_words if w not in CATEGORY_WORDS] context_words = [w for w in meaningful_words if w in CATEGORY_WORDS] # If no specific targets (e.g. prompt is just "detect color"), treat categories as targets if not target_words: target_words = context_words for entry in caption_entries: raw_caption = entry["text"].lower() model_name = entry["model"] # 1. Strip context echo caption_text = raw_caption scene_ctx_idx = caption_text.find("scene context:") if scene_ctx_idx != -1: caption_text = caption_text[:scene_ctx_idx].strip().rstrip(".") if not caption_text or len(caption_text) < 5: continue # 2. Check negative conclusion patterns if any(re.search(pattern, caption_text) for pattern in negative_patterns): continue # 3. Sentence-level Verification (with Negation Awareness) sentences = re.split(r'[.!?]+', caption_text) # Filter for sentences that ARE NOT negated positive_sentences = [] for s in sentences: s = s.strip() if not s: continue # If sentence contains a negation word, it's not a positive finding if any(re.search(fr"\b{re.escape(neg)}\b", s) for neg in negation_words): continue positive_sentences.append(s) # Combine positive findings and sanitize special characters (slashes/parentheses) # This allows "standing/active" to match "standing" and "person(s)" to match "person" verified_context = " ".join(positive_sentences) verified_context = verified_context.replace("/", " ").replace("(", " ").replace(")", " ") if not verified_context: continue # Verify ALL mandatory target words exist all_targets_verified = True UTILITY_WORDS = {"sign", "evidence", "monitor", "detect", "finding", "presence", "detection", "check", "monitor"} for word in target_words: # Skip utility words that don't appear in technical specialist reports if word in UTILITY_WORDS: continue word_found = False # Literal check if re.search(fr"\b{re.escape(word)}\b", verified_context): word_found = True # Semantic expansion check (using the new Bidirectional Mirror) elif word in self.concept_mirror: for kw in self.concept_mirror[word]: if re.search(fr"\b{re.escape(kw)}\b", verified_context): word_found = True break if not word_found: all_targets_verified = False break if all_targets_verified: # Positive match found in this report! # Evidence: show raw caption only, strip model name prefix return (True, raw_caption[:120], model_name) return (False, None, None) return (False, None, None) class BufferManager: """Manages session-specific observation buffers.""" def __init__(self, window_seconds: float = 10.0): self.window_seconds = window_seconds self.buffers: Dict[str, ObservationBuffer] = {} def get_buffer(self, session_id: str) -> ObservationBuffer: if session_id not in self.buffers: self.buffers[session_id] = ObservationBuffer(window_seconds=self.window_seconds) return self.buffers[session_id] def clear_session(self, session_id: str): if session_id in self.buffers: del self.buffers[session_id] # --- STAGE 5: EXECUTIVE CONTROL (Mission Supervisor) --- class MissionSupervisor: """ Adaptive controller that monitors mission progress and manages retries. Implements the "Decision-Ready" logic engine. """ MAX_ATTEMPTS = 3 def __init__(self, evaluator: MissionEvaluator): self.evaluator = evaluator self.attempts = {} # session_id -> current_attempt_count self.history = {} # session_id -> list of tried strategies def evaluate_and_supervise(self, session_id: str, world_state: Any) -> Dict[str, Any]: """ Stage 4 & 5 Combined: 1. Evaluate mission (Stage 4) using the World State 2. If failed, classify failure and adapt (Stage 5) """ result = self.evaluator.evaluate(world_state) # ── PHASE 5: Anticipatory Early Warning ── # Even if mission not achieved, if risk is high, we inject a warning ws_dict = world_state.to_dict() if hasattr(world_state, "to_dict") else world_state high_risk_entities = [ e for e in ws_dict.get("entities", []) if (e.get("prediction") or {}).get("risk_score", 0) > 0.75 ] if high_risk_entities: result["early_warning"] = { "level": "critical", "message": f"Anticipatory threat forming! {len(high_risk_entities)} subjects showing risky behavior.", "details": [e["prediction"] for e in high_risk_entities] } # If achieved, no mission, or status is none, reset supervisor if result["mission_status"] in ["achieved", "none"] or result["status_message"] == "No active mission.": self.attempts[session_id] = 0 self.history[session_id] = [] return result # Stage 5: Adaptive Retry Logic current_attempt = self.attempts.get(session_id, 0) if current_attempt < self.MAX_ATTEMPTS: # Diagnose why it failed (Diagnostics Engine) failure_type = self._diagnose_failure(world_state) strategy = self._select_strategy(failure_type, self.history.get(session_id, [])) self.attempts[session_id] = current_attempt + 1 if session_id not in self.history: self.history[session_id] = [] self.history[session_id].append(strategy) result["mission_status"] = "retrying" result["next_strategy"] = strategy result["attempt"] = self.attempts[session_id] logger.info(f"[SUPERVISOR] Attempt {result['attempt']} failed. Diagnosis: {failure_type}. Strategy: {strategy}") else: result["mission_status"] = "failed" result["status_message"] = "Mission failed after 3 attempts. Target not found or unreachable." logger.warning(f"[SUPERVISOR] Mission FAILED for session {session_id}.") return result def _diagnose_failure(self, world_state: Any) -> str: """Situational Diagnostic Engine: Why is the mission stalling?""" ws_dict = world_state.to_dict() if hasattr(world_state, "to_dict") else world_state entities = ws_dict.get("entities", []) # 1. Lighting & Environment if ws_dict.get("lighting") == "low": return "low_light" if ws_dict.get("noise_level", 0.0) > 0.6: return "high_noise" # 2. Target Specific Issues (Risk & Uncertainty) if entities: best_e = max(entities, key=lambda e: e.get("confidence", 0)) pred = best_e.get("prediction") or {} if pred.get("uncertainty", 0) > 0.6: return "high_perception_uncertainty" if pred.get("risk_score", 0) > 0.6: return "high_anticipatory_risk" if best_e.get("confidence", 0) < 0.4: return "low_confidence_match" # 3. Check for temporal stagnation (Is it seen but not 'developing' in attributes?) history = best_e.get("history", []) if len(history) > 5: states = [h.get("state") for h in history[-5:]] if all(s == states[0] for s in states): return "temporal_stagnation" # 3. Sensor Issues if ws_dict.get("target_status") == "occluded": return "occlusion" return "target_not_found" def _select_strategy(self, failure_type: str, history: List[str]) -> str: """ROI-driven Strategy Selection: Pick bestSuccess/Cost ratio.""" # Strategy Catalog: {name: (success_prob, resource_cost)} strategies = { "low_light": [ ("enable_thermal", 0.9, 4), ("increase_gain", 0.6, 1), ("lower_thresholds", 0.2, 0) ], "high_noise": [ ("switch_to_visual_only", 0.7, 1), ("noise_gating", 0.5, 2) ], "low_confidence_match": [ ("switch_to_specialist", 0.8, 2), ("zoom_in", 0.5, 1) ], "high_perception_uncertainty": [ ("active_sensing_sweep", 0.8, 3), # Multi-model cross-check ("widen_fov", 0.4, 1) ], "high_anticipatory_risk": [ ("increase_frame_rate", 0.7, 5), # Hardware intensive ("switch_to_specialist", 0.9, 2) ], "temporal_stagnation": [ ("widen_fov", 0.4, 1), ("lower_semantic_thresholds", 0.3, 1) ], "target_not_found": [ ("widen_fov", 0.5, 1), ("request_human_clarification", 1.0, 10) # Human is high cost! ] } candidates = strategies.get(failure_type, [("lower_thresholds", 0.2, 0)]) # Filter out already tried strategies untried = [s for s in candidates if s[0] not in history] if not untried: return "human_intervention_required" # Rank by ROI: Success Prob - (Cost * 0.1) - Basic ROI formula # We want high success but low cost. untried.sort(key=lambda x: x[1] - (x[2] * 0.1), reverse=True) return untried[0][0] # Singleton instances mission_evaluator = MissionEvaluator() buffer_manager = BufferManager() mission_supervisor = MissionSupervisor(mission_evaluator) buffer_manager = BufferManager() mission_supervisor = MissionSupervisor(mission_evaluator)