| import time |
| import logging |
| import re |
| from typing import List, Dict, Any, Optional |
| from collections import deque |
| from dataclasses import dataclass, field, asdict |
|
|
| logger = logging.getLogger(__name__) |
|
|
| @dataclass |
| class ObservationEvent: |
| """Unified schema for any structured detection or event.""" |
| timestamp: float |
| type: str |
| data: Dict[str, Any] |
| source: str |
| confidence: float = 1.0 |
|
|
| @dataclass |
| class MissionRule: |
| """Machine-readable mission definition.""" |
| domain: str |
| target: str |
| condition: str = "exists" |
| threshold: float = 0.4 |
| attributes: Dict[str, str] = field(default_factory=dict) |
| min_count: int = 1 |
| metadata: Dict[str, Any] = field(default_factory=dict) |
|
|
| |
|
|
| |
| OBJECT_VOCAB = { |
| |
| "person": "person", "people": "person", "man": "person", "woman": "person", |
| "boy": "person", "girl": "person", "child": "person", "kid": "person", |
| "human": "person", "individual": "person", "subject": "person", |
| |
| "car": "car", "vehicle": "car", "automobile": "car", |
| "truck": "truck", "lorry": "truck", |
| "bus": "bus", |
| "motorcycle": "motorcycle", "motorbike": "motorcycle", "bike": "bicycle", |
| "bicycle": "bicycle", |
| "boat": "boat", "ship": "boat", |
| "airplane": "airplane", "plane": "airplane", "aircraft": "airplane", |
| |
| "cat": "cat", "kitten": "cat", "feline": "cat", |
| "dog": "dog", "puppy": "dog", "canine": "dog", |
| "bird": "bird", |
| "horse": "horse", |
| "cow": "cow", "cattle": "cow", |
| "sheep": "sheep", |
| "elephant": "elephant", |
| "bear": "bear", |
| |
| "knife": "knife", "blade": "knife", |
| "gun": "gun", "pistol": "gun", "rifle": "gun", "firearm": "gun", |
| "weapon": "knife", |
| "sword": "knife", |
| |
| "phone": "cell phone", "cellphone": "cell phone", "mobile": "cell phone", |
| "laptop": "laptop", "computer": "laptop", |
| "backpack": "backpack", "bag": "backpack", |
| "umbrella": "umbrella", |
| "bottle": "bottle", |
| "chair": "chair", |
| "table": "dining table", "desk": "dining table", |
| "tv": "tv", "television": "tv", "monitor": "tv", |
| "book": "book", |
| "clock": "clock", |
| "fire": "fire", "flame": "fire", |
| "headphones": "headphones", "headphone": "headphones", |
| "glasses": "glasses", "eyeglasses": "glasses", |
| "shirt": "shirt", "tshirt": "shirt", "top": "shirt", |
| "shoe": "shoe", "shoes": "shoe", "boot": "shoe", |
| "pant": "pant", "pants": "pant", "jeans": "pant", |
| "suit": "suit", "jacket": "suit", "coat": "suit", |
| } |
|
|
| |
| AUDIO_VOCAB = { |
| "gunshot": "gunshot", "gunfire": "gunshot", "shooting": "gunshot", |
| "explosion": "explosion", "blast": "explosion", "bang": "explosion", |
| "scream": "scream", "screaming": "scream", "yelling": "scream", |
| "siren": "siren", "alarm": "siren", |
| "speech": "speech", "talking": "speech", "conversation": "speech", |
| "music": "music", "singing": "music", "song": "music", |
| "barking": "barking", "bark": "barking", |
| "engine": "engine", "motor": "engine", |
| "footsteps": "footsteps", "walking": "footsteps", "running": "footsteps", |
| "glass_break": "glass_break", "shatter": "glass_break", |
| "crying": "crying", "cry": "crying", "sobbing": "crying", |
| "horn": "horn", "honking": "horn", |
| "thunder": "thunder", |
| "rain": "rain", |
| "wind": "wind", |
| } |
|
|
| |
| COLOR_VOCAB = { |
| "black", "white", "red", "blue", "green", "yellow", "orange", |
| "purple", "pink", "brown", "gray", "grey", "silver", "gold", |
| "dark", "light", "bright", |
| } |
|
|
| |
| COUNT_VOCAB = { |
| "group": 3, "crowd": 5, "many": 3, "several": 3, |
| "few": 2, "couple": 2, "pair": 2, "multiple": 2, |
| "two": 2, "three": 3, "four": 4, "five": 5, |
| } |
|
|
| |
| CATEGORY_WORDS = { |
| "color", "colors", "sound", "sounds", "noise", "noises", |
| "object", "objects", "item", "items", "thing", "things", |
| "activity", "event", "action", "sign", "signal", "sense", |
| "perceive", "detect", "finding", "report" |
| } |
|
|
| |
| SPEECH_VOCAB = { |
| "say": "transcribe", "said": "transcribe", "saying": "transcribe", |
| "speak": "transcribe", "speaking": "transcribe", "spoken": "transcribe", |
| "tell": "transcribe", "telling": "transcribe", "told": "transcribe", |
| "transcribe": "transcribe", "transcript": "transcribe", "transcription": "transcribe", |
| "voice": "transcribe", "voices": "transcribe", |
| "words": "transcribe", "word": "transcribe", |
| "language": "transcribe", "dialogue": "transcribe", "dialog": "transcribe", |
| "quote": "transcribe", "mention": "transcribe", |
| "shout": "transcribe", "whisper": "transcribe", "yell": "transcribe", |
| "call": "transcribe", "called": "transcribe", "calling": "transcribe", |
| } |
|
|
| |
|
|
| |
| DOMAIN_MODEL_MAP = { |
| "object": ["git_base", "yolo"], |
| "audio": ["wavcap"], |
| "speech": ["whisper"], |
| } |
|
|
| |
| DEFAULT_MODELS = {"git_base", "wavcap"} |
|
|
|
|
| def route_models(rules: List[MissionRule]) -> set: |
| """ |
| Given parsed mission rules, return the set of models that need to run. |
| No prompt (empty rules) → default captioning models. |
| With prompt → only the models needed for the detected domains. |
| """ |
| if not rules: |
| return DEFAULT_MODELS.copy() |
| |
| models = set() |
| for rule in rules: |
| domain_models = DOMAIN_MODEL_MAP.get(rule.domain, []) |
| models.update(domain_models) |
| |
| logger.info(f"[ROUTER] Domains: {set(r.domain for r in rules)} → Models: {models}") |
| return models |
|
|
|
|
| def parse_mission(user_prompt: str) -> List[MissionRule]: |
| """ |
| Hybrid Mission Interpreter: Rule Engine → Semantic LLM Injection. |
| """ |
| |
| rules = _parse_mission_deterministic(user_prompt) |
| |
| |
| |
| logger.info(f"[INTERPRETER] Performing semantic scan for '{user_prompt}'...") |
| from reasoning_engine import reasoning_engine |
| intent = reasoning_engine.interpret_mission(user_prompt) |
| |
| target = intent.get("target") |
| caps = intent.get("capabilities", []) |
| attrs = intent.get("attributes", {}) |
|
|
| |
| |
| if target and target.lower() not in ["none", "null"]: |
| |
| already_covered = any(r.target.lower() == target.lower() for r in rules) |
| |
| if not already_covered: |
| domain = "object" |
| if "audio" in str(caps).lower(): domain = "audio" |
| if "speech" in str(caps).lower(): domain = "speech" |
| |
| rules.append(MissionRule( |
| domain=domain, |
| target=target, |
| attributes=attrs, |
| threshold=0.35, |
| metadata={"source": "semantic_llm"} |
| )) |
| logger.info(f"[SEMANTIC INJECTION] Created dynamic rule for target: '{target}'") |
|
|
| return rules |
|
|
| def _parse_mission_deterministic(user_prompt: str) -> List[MissionRule]: |
| """ |
| Deterministic keyword parser. Converts natural language into structured MissionRules. |
| No LLM involved — pure Python logic, 100% reliable. |
| """ |
| if not user_prompt or not user_prompt.strip(): |
| return [] |
|
|
| text = user_prompt.lower().strip() |
| |
| clean = re.sub(r'[^\w\s]', ' ', text) |
| words = clean.split() |
|
|
| rules: List[MissionRule] = [] |
| found_objects = set() |
| found_audio = set() |
|
|
| |
| def lookup(word: str, vocab: dict) -> Optional[str]: |
| if word in vocab: |
| return vocab[word] |
| |
| if len(word) > 4: |
| for suffix in ['es', 's']: |
| if word.endswith(suffix): |
| stem = word[:-len(suffix)] |
| if stem in vocab: |
| return vocab[stem] |
| return None |
|
|
| |
| colors_found = [w for w in words if w in COLOR_VOCAB] |
|
|
| |
| min_count = 1 |
| for word in words: |
| if word in COUNT_VOCAB: |
| min_count = max(min_count, COUNT_VOCAB[word]) |
| break |
|
|
| |
| for word in words: |
| canonical = lookup(word, OBJECT_VOCAB) |
| if canonical: |
| if canonical not in found_objects: |
| found_objects.add(canonical) |
| attrs = {} |
| if colors_found: |
| attrs["color"] = colors_found[0] |
| rules.append(MissionRule( |
| domain="object", |
| target=canonical, |
| attributes=attrs, |
| min_count=min_count if canonical == "person" else 1, |
| threshold=0.4, |
| )) |
|
|
| |
| for word in words: |
| canonical = lookup(word, AUDIO_VOCAB) |
| if canonical: |
| if canonical not in found_audio: |
| found_audio.add(canonical) |
| rules.append(MissionRule( |
| domain="audio", |
| target=canonical, |
| threshold=0.5, |
| )) |
|
|
| |
| found_speech = False |
| for word in words: |
| canonical = lookup(word, SPEECH_VOCAB) |
| if canonical and not found_speech: |
| found_speech = True |
| rules.append(MissionRule( |
| domain="speech", |
| target="transcribe", |
| threshold=0.3, |
| )) |
|
|
| |
| if not rules: |
| for i in range(len(words) - 1): |
| bigram = f"{words[i]} {words[i+1]}" |
| if bigram in OBJECT_VOCAB: |
| canonical = OBJECT_VOCAB[bigram] |
| if canonical not in found_objects: |
| found_objects.add(canonical) |
| rules.append(MissionRule(domain="object", target=canonical)) |
|
|
| |
| if not rules: |
| for word in reversed(words): |
| if len(word) > 2 and word not in {"the", "for", "any", "sign", "find", "detect", "there", "with", "and", "are", "was", "has", "have"}: |
| rules.append(MissionRule(domain="object", target=word, threshold=0.3)) |
| break |
|
|
| logger.info(f"[MISSION PARSER] '{user_prompt}' → {len(rules)} rules: {[(r.domain, r.target, r.attributes) for r in rules]}") |
| return rules |
|
|
|
|
| class ObservationBuffer: |
| """Rolling buffer for temporal reasoning across events.""" |
| def __init__(self, window_seconds: float = 10.0): |
| self.window_seconds = window_seconds |
| self.buffer: deque[ObservationEvent] = deque() |
|
|
| def add(self, event: ObservationEvent): |
| self.buffer.append(event) |
| self._prune() |
|
|
| def _prune(self): |
| if not self.buffer: |
| return |
| now = time.time() |
| while self.buffer and (now - self.buffer[0].timestamp > self.window_seconds): |
| self.buffer.popleft() |
|
|
| def get_all(self) -> List[ObservationEvent]: |
| return list(self.buffer) |
|
|
|
|
| class MissionEvaluator: |
| """Deterministic logic engine for matching rules against observations.""" |
| def __init__(self): |
| self.active_rules: List[MissionRule] = [] |
| |
| self.concept_mirror = self._mirror_concept_map(self.CONCEPT_MAP) |
|
|
| def _mirror_concept_map(self, original_map: Dict[str, List[str]]) -> Dict[str, set]: |
| """ |
| Hardens the CONCEPT_MAP by making it bidirectional and self-referential. |
| If 'person' maps to 'human', then 'human' will now map to 'person'. |
| """ |
| mirrored = {} |
| |
| |
| for key, synonyms in original_map.items(): |
| all_words = set(synonyms) |
| all_words.add(key) |
| |
| if key not in mirrored: |
| mirrored[key] = all_words |
| else: |
| mirrored[key].update(all_words) |
| |
| |
| for syn in synonyms: |
| if syn not in mirrored: |
| mirrored[syn] = all_words |
| else: |
| mirrored[syn].update(all_words) |
| |
| logger.info(f"[EVALUATOR] Semantic Mirroring complete. Expanded {len(original_map)} concepts into {len(mirrored)} bidirectional links.") |
| return mirrored |
|
|
| def set_rules(self, rules: List[MissionRule]): |
| self.active_rules = rules |
|
|
| def evaluate(self, world_state: Any) -> Dict[str, Any]: |
| """ |
| The Checkmate Engine: Performs a deterministic logical verification. |
| Instead of scanning a fuzzy buffer, it checks the Mirror (WorldState) |
| against the Blueprint (MissionRules). |
| """ |
| if not self.active_rules: |
| return { |
| "satisfied": False, |
| "alerts": [], |
| "status_message": "No active mission.", |
| "mission_status": "none", |
| "timestamp": time.time() |
| } |
|
|
| alerts = [] |
| entities = getattr(world_state, "entities", []) |
|
|
| for rule in self.active_rules: |
| |
| match = self._checkmate_rule(rule, entities, world_state) |
| if match: |
| alerts.append(match) |
|
|
| satisfied = len(alerts) >= len(self.active_rules) if self.active_rules else False |
|
|
| if satisfied: |
| alert_messages = [a.get('message', '') for a in alerts] |
| status_message = "→ SENTINEL CHECKMATE: Mission purpose fulfilled." |
| mission_status = "achieved" |
| else: |
| status_message = f"Monitoring... {len(alerts)}/{len(self.active_rules)} requirements satisfied." |
| mission_status = "ongoing" |
|
|
| return { |
| "satisfied": satisfied, |
| "alerts": alerts, |
| "status_message": status_message, |
| "mission_status": mission_status, |
| "timestamp": time.time(), |
| "score": len(alerts) / len(self.active_rules) if self.active_rules else 0 |
| } |
|
|
| def _checkmate_rule(self, rule: MissionRule, entities: List[Dict[str, Any]], world_state: Any) -> Optional[Dict[str, Any]]: |
| """ |
| Performs the hard logical checkmate for a single mission rule. |
| Checks BOTH standard entities (YOLO) AND specialist findings (color, terrain, audio). |
| """ |
| target = rule.target.lower() |
| |
| for entity in entities: |
| e_type = str(entity.get("type", "")).lower() |
| e_attrs = entity.get("attributes", {}) |
| e_conf = entity.get("confidence", 0.0) |
| is_specialist = entity.get("finding_type") == "specialist" |
| |
| |
| if not is_specialist: |
| |
| if not e_type or len(e_type) < 2: |
| continue |
| |
| e_type_words = set(e_type.replace("_", " ").split()) |
| target_words = set(target.replace("_", " ").split()) |
| if not (target_words & e_type_words) and target != e_type: |
| continue |
| if e_conf < rule.threshold: continue |
| |
| if rule.attributes: |
| match_attr = True |
| for attr_key, attr_val in rule.attributes.items(): |
| e_val = str(e_attrs.get(attr_key, "")).lower() |
| if attr_val.lower() not in e_val: |
| match_attr = False |
| break |
| if not match_attr: continue |
| return { |
| "alert": True, "type": "checkmate_match", |
| "message": f"Verified target '{target}' in world state.", |
| "entity": entity, "confidence": e_conf |
| } |
| |
| |
| else: |
| |
| attr_value = str(e_attrs.get("value", "")).lower() |
| if target in attr_value or attr_value in target: |
| return { |
| "alert": True, "type": "specialist_match", |
| "message": f"Specialist confirmed: '{target}' detected ({attr_value}).", |
| "entity": entity, "confidence": e_conf |
| } |
| |
| |
| |
| attr_type = str(e_attrs.get("attribute", "")).lower() |
| if attr_type and target != attr_type: |
| |
| |
| if target in attr_value or attr_value in target: |
| return { |
| "alert": True, "type": "specialist_match", |
| "message": f"Specialist confirmed '{attr_type}' value '{attr_value}' matches target '{target}'.", |
| "entity": entity, "confidence": e_conf |
| } |
| |
| |
| explanation = str(e_attrs.get("explanation", "")).lower() |
| if target in explanation: |
| |
| negatives = ["no ", "not ", "none ", "zero ", "missing", "unavailable", "0 "] |
| is_negative = False |
| for neg in negatives: |
| |
| idx = explanation.find(target) |
| snippet = explanation[max(0, idx-20):idx] |
| if neg in snippet: |
| is_negative = True |
| break |
| |
| if not is_negative: |
| return { |
| "alert": True, "type": "specialist_match", |
| "message": f"Specialist report contains reference to '{target}'.", |
| "entity": entity, "confidence": e_conf |
| } |
| |
| |
| if rule.domain == 'audio': |
| for entity in entities: |
| if entity.get("type") == "acoustic_event": |
| e_label = str(entity.get("attributes", {}).get("label", "")).lower() |
| if target in e_label: |
| return {"alert": True, "message": f"Acoustic confirmed: {target}", "entity": entity} |
|
|
| return None |
|
|
| |
| |
| |
| CONCEPT_MAP = { |
| |
| |
| |
| "danger": ["knife", "gun", "weapon", "fire", "flame", "blood", "fight", "alarm", "scream", "explosion", "threat", "attack", "violence", "aggressive", "sword", "pistol", "rifle", "crash", "collision", "smoke", "broken", "falling", "injured", "hazard", "threat"], |
| "threat": ["knife", "gun", "weapon", "fire", "blood", "fight", "alarm", "scream", "explosion", "attack", "violence", "aggressive", "intruder", "suspicious", "trespassing", "masked", "hooded", "danger", "hostile"], |
| "emergency": ["fire", "smoke", "alarm", "siren", "scream", "crash", "explosion", "injured", "blood", "ambulance", "fallen", "collapse", "unconscious", "drowning", "critical"], |
| "accident": ["crash", "collision", "fall", "fallen", "broken", "blood", "damage", "wreck", "injury", "injured", "ambulance", "fire", "smoke", "shattered", "debris", "impact", "overturned", "dent"], |
| "crime": ["knife", "gun", "weapon", "masked", "hooded", "stealing", "robbery", "intruder", "trespassing", "suspicious", "fight", "assault", "vandalism", "break-in", "threat"], |
| "weapon": ["knife", "gun", "pistol", "rifle", "sword", "blade", "firearm", "machete", "bat", "club", "axe", "hammer"], |
| "violence": ["fight", "fighting", "attack", "punch", "kick", "aggressive", "blood", "weapon", "assault", "struggle", "thrown", "hit", "slap"], |
| "intrusion": ["intruder", "trespassing", "unauthorized", "stranger", "suspicious", "break-in", "forced entry", "masked", "hooded", "sneaking"], |
| "suspicious": ["suspicious", "lurking", "hiding", "sneaking", "masked", "hooded", "loitering", "unusual", "strange", "watching"], |
| "hazard": ["fire", "smoke", "chemical", "spill", "leak", "gas", "electrical", "wire", "flooding", "slippery", "obstacle", "debris"], |
| "risk": ["knife", "gun", "fire", "fall", "height", "speed", "chemical", "explosion", "collision", "electrical", "drowning"], |
| |
| |
| |
| |
| "activity": ["walking", "running", "sitting", "standing", "holding", "moving", "talking", "eating", "drinking", "working", "playing", "reading", "typing", "cooking", "cleaning", "dancing", "exercising", "sleeping", "lying", "writing", "person", "man", "woman", "human", "subject", "individual", "boy", "girl", "child", "people"], |
| "movement": ["walking", "running", "moving", "jumping", "climbing", "crawling", "dancing", "jogging", "sprinting", "stepping", "marching", "pacing", "sliding"], |
| "interaction": ["talking", "speaking", "shaking hands", "hugging", "fighting", "pointing", "waving", "greeting", "kissing", "arguing", "collaborating"], |
| "working": ["typing", "writing", "computer", "laptop", "desk", "phone", "meeting", "office", "paperwork", "keyboard", "tool", "construction"], |
| "eating": ["eating", "food", "drinking", "cup", "glass", "plate", "fork", "spoon", "restaurant", "kitchen", "cooking", "meal", "snack", "chewing"], |
| "exercising": ["running", "jogging", "pushup", "jumping", "stretching", "yoga", "gym", "weights", "fitness", "workout", "training", "sport"], |
| "sleeping": ["sleeping", "lying", "bed", "resting", "napping", "pillow", "blanket", "unconscious", "eyes closed", "still"], |
| "cooking": ["cooking", "stove", "pan", "pot", "kitchen", "chopping", "stirring", "baking", "oven", "food", "ingredient"], |
| "cleaning": ["cleaning", "sweeping", "mopping", "wiping", "washing", "scrubbing", "vacuum", "broom", "dust", "spray"], |
| "reading": ["reading", "book", "newspaper", "magazine", "screen", "text", "letter", "document", "page"], |
| "driving": ["driving", "steering", "car", "vehicle", "road", "wheel", "dashboard", "seat", "traffic"], |
| "talking": ["talking", "speaking", "conversation", "voice", "phone", "call", "discussion", "chat", "dialogue"], |
| "speech": ["speaking", "talking", "conversation", "voice", "dialogue", "said", "spoke", "shouted", "whispered"], |
| "voice": ["speaking", "talking", "voice", "audio", "vocal", "speech", "shout", "whisper"], |
| "waiting": ["standing", "sitting", "still", "idle", "waiting", "stationary", "motionless", "paused"], |
| "running": ["running", "sprinting", "jogging", "moving fast", "chasing", "fleeing", "rushing"], |
| "fighting": ["fighting", "punching", "kicking", "wrestling", "struggle", "attack", "hit", "aggressive", "violent"], |
| |
| |
| |
| |
| "happy": ["smiling", "laughing", "smile", "joyful", "cheerful", "celebrating", "clapping", "excited"], |
| "sad": ["crying", "tears", "sobbing", "depressed", "frown", "head down", "mourning"], |
| "angry": ["aggressive", "shouting", "yelling", "fist", "fighting", "threatening", "confrontation", "furious"], |
| "scared": ["screaming", "running", "hiding", "trembling", "panicked", "frightened", "cowering"], |
| "calm": ["sitting", "standing", "still", "relaxed", "peaceful", "quiet", "resting"], |
| "confused": ["looking around", "scratching head", "lost", "disoriented", "wandering"], |
| "celebration": ["clapping", "cheering", "dancing", "jumping", "waving", "party", "balloons", "cake", "confetti"], |
| |
| |
| |
| |
| "human": ["human", "person", "man", "woman", "subject", "individual", "boy", "girl", "child", "people"], |
| "person": ["person", "man", "woman", "human", "subject", "individual", "boy", "girl", "child", "people", "face", "standing", "walking", "sitting"], |
| "man": ["man", "male", "guy", "boy", "gentleman", "person"], |
| "woman": ["woman", "female", "lady", "girl", "person"], |
| "child": ["child", "kid", "baby", "boy", "girl", "infant", "toddler", "young"], |
| "crowd": ["people", "group", "crowd", "gathering", "multiple", "several", "audience", "assembly", "line", "queue"], |
| "face": ["face", "facial", "biometric", "identity", "recognized", "detected face", "eyes", "mouth", "nose", "forehead"], |
| "identity": ["face", "facial", "biometric", "recognized", "identified", "known", "unknown", "stranger", "authorized"], |
| "stranger": ["unknown", "unrecognized", "unauthorized", "stranger", "unfamiliar", "not identified"], |
| "uniform": ["uniform", "vest", "helmet", "badge", "safety gear", "hi-vis", "reflective", "hardhat"], |
| "mask": ["mask", "masked", "face covering", "balaclava", "surgical mask", "respirator"], |
| |
| |
| |
| |
| "gesture": ["hand", "gesture", "pointing", "waving", "thumbs", "fist", "sign language", "beckoning", "raised hand", "peace sign", "ok sign"], |
| "pose": ["standing", "sitting", "lying", "crouching", "walking", "running", "active", "kneeling", "bending", "leaning", "squatting"], |
| "standing": ["standing", "upright", "erect", "vertical", "on feet", "stationary"], |
| "sitting": ["sitting", "seated", "chair", "bench", "cross-legged", "slouching"], |
| "lying": ["lying", "prone", "supine", "on ground", "fallen", "collapsed", "horizontal", "flat"], |
| "kneeling": ["kneeling", "on knees", "crouching", "bent", "bowing"], |
| "pointing": ["pointing", "directing", "indicating", "finger", "aimed", "showing"], |
| "waving": ["waving", "hand up", "greeting", "flagging", "signaling"], |
| "handshake": ["handshake", "shaking hands", "greeting", "agreement", "meeting"], |
| |
| |
| |
| |
| "indoor": ["room", "bathroom", "kitchen", "office", "bedroom", "hallway", "corridor", "building", "interior", "inside", "lobby", "warehouse", "garage", "basement", "attic"], |
| "outdoor": ["street", "road", "field", "forest", "park", "garden", "sky", "mountain", "beach", "lake", "river", "highway", "parking", "yard", "sidewalk", "pathway"], |
| "room": ["room", "wall", "floor", "ceiling", "door", "window", "furniture", "light", "interior", "indoor"], |
| "bathroom": ["bathroom", "toilet", "sink", "shower", "mirror", "tile", "faucet", "bath"], |
| "kitchen": ["kitchen", "stove", "oven", "refrigerator", "counter", "cabinet", "cooking", "pot", "pan", "sink"], |
| "office": ["office", "desk", "computer", "monitor", "keyboard", "chair", "cubicle", "meeting room", "whiteboard"], |
| "bedroom": ["bedroom", "bed", "pillow", "blanket", "nightstand", "closet", "wardrobe", "mattress"], |
| "street": ["street", "road", "sidewalk", "pavement", "intersection", "crosswalk", "traffic", "lane", "highway"], |
| "forest": ["forest", "tree", "trees", "woods", "woodland", "jungle", "vegetation", "leaf", "branch", "nature"], |
| "beach": ["beach", "sand", "ocean", "sea", "wave", "shore", "coast", "surfing", "sunbathing"], |
| "parking": ["parking", "car park", "garage", "lot", "vehicle", "parked", "space"], |
| "construction": ["construction", "building site", "crane", "scaffold", "hard hat", "cement", "brick", "steel", "foundation"], |
| "hospital": ["hospital", "medical", "nurse", "doctor", "patient", "bed", "stretcher", "ambulance", "IV", "bandage"], |
| "school": ["school", "classroom", "student", "teacher", "desk", "board", "backpack", "book", "education"], |
| "store": ["store", "shop", "retail", "shelf", "product", "checkout", "counter", "customer", "aisle", "mall"], |
| "warehouse": ["warehouse", "storage", "boxes", "pallets", "shelving", "industrial", "loading dock", "forklift"], |
| |
| |
| |
| |
| "fire": ["fire", "flame", "smoke", "burning", "thermal", "heat", "blaze", "inferno", "ember", "ignite", "combustion", "wildfire"], |
| "smoke": ["smoke", "smoking", "fumes", "haze", "smog", "fog", "mist", "steam", "vapor"], |
| "explosion": ["explosion", "blast", "bomb", "detonation", "fireworks", "burst", "shockwave"], |
| "hot": ["fire", "flame", "heat", "thermal", "burning", "hot", "warm", "boiling", "steam"], |
| |
| |
| |
| |
| "weather": ["rain", "snow", "wind", "storm", "cloud", "sunny", "fog", "lightning", "thunder", "hail", "drizzle"], |
| "rain": ["rain", "raining", "wet", "puddle", "umbrella", "drizzle", "downpour", "storm"], |
| "snow": ["snow", "snowing", "ice", "icy", "frost", "frozen", "cold", "blizzard", "snowflake", "slippery"], |
| "wind": ["wind", "windy", "blowing", "gust", "breeze", "tornado", "hurricane", "storm"], |
| "night": ["dark", "night", "dim", "low light", "moonlight", "shadow", "darkness", "nighttime"], |
| "day": ["bright", "daylight", "sunny", "sunlight", "daytime", "clear", "morning", "afternoon"], |
| "flood": ["flood", "flooding", "water", "submerged", "rising water", "overflow", "dam", "rain"], |
| |
| |
| |
| |
| "vehicle": ["car", "truck", "bus", "motorcycle", "bicycle", "van", "vehicle", "suv", "taxi", "ambulance"], |
| "car": ["car", "sedan", "automobile", "vehicle", "driving", "parked", "suv", "hatchback"], |
| "truck": ["truck", "lorry", "semi", "trailer", "freight", "hauling", "delivery"], |
| "motorcycle": ["motorcycle", "motorbike", "scooter", "moped", "biker", "helmet", "two-wheeler"], |
| "bicycle": ["bicycle", "bike", "cycling", "cyclist", "pedal", "wheel", "handlebar"], |
| "bus": ["bus", "transit", "public transport", "shuttle", "coach", "school bus"], |
| "train": ["train", "railway", "railroad", "locomotive", "subway", "metro", "tram", "platform", "tracks"], |
| "airplane": ["airplane", "plane", "aircraft", "jet", "flying", "airport", "runway", "helicopter"], |
| "boat": ["boat", "ship", "vessel", "yacht", "canoe", "kayak", "ferry", "cruise", "sailing"], |
| "helicopter": ["helicopter", "chopper", "rotor", "hovering", "aerial", "helipad"], |
| |
| |
| |
| |
| "animal": ["dog", "cat", "bird", "horse", "animal", "cow", "sheep", "goat", "pig", "rabbit", "deer", "bear", "elephant", "lion", "tiger", "snake", "fish", "chicken", "duck", "monkey"], |
| "dog": ["dog", "puppy", "canine", "barking", "bark", "retriever", "shepherd", "bulldog", "poodle"], |
| "cat": ["cat", "kitten", "feline", "meowing", "purring", "tabby", "siamese"], |
| "bird": ["bird", "flying", "wings", "feathers", "chirping", "eagle", "hawk", "pigeon", "parrot", "crow", "sparrow", "owl"], |
| "horse": ["horse", "stallion", "mare", "pony", "galloping", "riding", "equine", "saddle"], |
| "pet": ["dog", "cat", "pet", "hamster", "rabbit", "fish", "parrot", "turtle", "guinea pig"], |
| "insect": ["insect", "bug", "ant", "bee", "spider", "fly", "mosquito", "butterfly", "moth", "beetle", "cockroach"], |
| "wildlife": ["deer", "bear", "wolf", "fox", "eagle", "snake", "lion", "tiger", "elephant", "monkey", "wild"], |
| |
| |
| |
| |
| "phone": ["phone", "cell phone", "mobile", "smartphone", "device", "screen", "calling", "texting"], |
| "computer": ["computer", "laptop", "monitor", "screen", "keyboard", "mouse", "desktop", "typing"], |
| "furniture": ["chair", "table", "desk", "couch", "sofa", "shelf", "cabinet", "bed", "drawer", "bookcase"], |
| "bag": ["bag", "backpack", "suitcase", "purse", "handbag", "luggage", "briefcase", "duffel"], |
| "bottle": ["bottle", "water bottle", "container", "jar", "glass", "cup", "mug", "flask"], |
| "book": ["book", "novel", "textbook", "notebook", "magazine", "newspaper", "journal", "document", "paper"], |
| "umbrella": ["umbrella", "parasol", "rain cover", "canopy"], |
| "clock": ["clock", "time", "watch", "timer", "alarm clock", "countdown"], |
| "key": ["key", "keychain", "lock", "padlock", "unlock", "access"], |
| "tool": ["tool", "hammer", "screwdriver", "wrench", "pliers", "drill", "saw", "equipment"], |
| "ball": ["ball", "soccer", "football", "basketball", "tennis", "baseball", "golf", "volleyball"], |
| "helmet": ["helmet", "hard hat", "safety helmet", "motorcycle helmet", "bike helmet", "head protection"], |
| "glasses": ["glasses", "eyeglasses", "sunglasses", "spectacles", "goggles", "lens"], |
| "hat": ["hat", "cap", "beanie", "hood", "visor", "turban", "headband", "headwear"], |
| |
| |
| |
| |
| "text": ["text", "sign", "letter", "word", "writing", "label", "ocr", "read", "printed", "typed", "handwritten", "inscription"], |
| "sign": ["sign", "text", "label", "warning", "notice", "banner", "writing", "poster", "billboard", "placard", "signage"], |
| "license": ["license plate", "number plate", "registration", "plate", "tag"], |
| "label": ["label", "tag", "sticker", "price", "brand", "product name", "barcode"], |
| "graffiti": ["graffiti", "spray paint", "vandalism", "art", "mural", "tags", "street art"], |
| "color": ["color", "colors", "red", "blue", "green", "yellow", "white", "black", "orange", "purple", "pink", "brown", "gray", "grey", "silver"], |
| "colors": ["color", "colors", "red", "blue", "green", "yellow", "white", "black", "orange", "purple", "pink", "brown", "gray", "grey", "silver"], |
| |
| |
| |
| |
| "red": ["red", "crimson", "scarlet", "ruby", "maroon", "burgundy", "vermillion", "reddish"], |
| "reddish": ["red", "reddish", "crimson", "scarlet", "ruby", "maroon"], |
| "blue": ["blue", "navy", "azure", "cobalt", "cyan", "teal", "sapphire", "indigo", "bluish"], |
| "bluish": ["blue", "bluish", "navy", "azure", "cobalt", "cyan", "teal"], |
| "green": ["green", "lime", "emerald", "olive", "sage", "mint", "forest green", "jade", "greenish"], |
| "greenish": ["green", "greenish", "lime", "emerald", "olive"], |
| "yellow": ["yellow", "gold", "golden", "amber", "lemon", "mustard", "canary", "yellowish"], |
| "yellowish": ["yellow", "yellowish", "gold", "golden", "amber"], |
| "black": ["black", "dark", "ebony", "onyx", "charcoal", "jet black", "blackish"], |
| "blackish": ["black", "blackish", "dark", "ebony", "charcoal"], |
| "white": ["white", "ivory", "cream", "snow", "pearl", "bright white", "whitish"], |
| "whitish": ["white", "whitish", "ivory", "cream"], |
| "orange": ["orange", "tangerine", "coral", "peach", "amber", "rust", "orangish"], |
| "orangish": ["orange", "orangish", "tangerine", "coral"], |
| "purple": ["purple", "violet", "lavender", "plum", "magenta", "mauve", "lilac", "purplish"], |
| "purplish": ["purple", "purplish", "violet", "lavender", "plum", "magenta"], |
| "pink": ["pink", "rose", "salmon", "fuchsia", "blush", "magenta", "hot pink", "pinkish"], |
| "pinkish": ["pink", "pinkish", "rose", "salmon", "fuchsia"], |
| "brown": ["brown", "tan", "beige", "chocolate", "khaki", "chestnut", "coffee", "walnut", "brownish"], |
| "brownish": ["brown", "brownish", "tan", "beige", "chocolate"], |
| "gray": ["gray", "grey", "silver", "charcoal", "slate", "ash", "pewter", "grayish", "greyish"], |
| "grey": ["gray", "grey", "silver", "charcoal", "slate", "ash", "pewter", "grayish", "greyish"], |
| "grayish": ["gray", "grey", "grayish", "greyish", "silver", "charcoal"], |
| "greyish": ["gray", "grey", "grayish", "greyish", "silver", "charcoal"], |
| "silver": ["silver", "metallic", "chrome", "steel", "aluminum", "shiny", "reflective"], |
| "gold": ["gold", "golden", "gilded", "brass", "amber"], |
| "golden": ["gold", "golden", "gilded", "brass", "amber"], |
| "bright": ["bright", "vivid", "vibrant", "neon", "fluorescent", "glowing", "luminous", "colorful"], |
| "dark": ["dark", "dim", "shadow", "black", "night", "low light", "murky", "gloomy"], |
| "colorful": ["colorful", "multicolor", "rainbow", "bright", "vivid", "vibrant", "varied colors"], |
| |
| |
| |
| |
| "depth": ["depth", "near", "far", "close", "distant", "range", "obstacle", "clearance", "distance", "proximity"], |
| "close": ["near", "close range", "close", "immediate", "proximity", "adjacent", "beside", "nearby"], |
| "far": ["far", "distant", "remote", "away", "long range", "horizon"], |
| "obstacle": ["wall", "door", "furniture", "chair", "table", "close range", "near", "barrier", "blocked", "obstruction", "fence", "gate"], |
| "distance": ["near", "far", "close", "distant", "range", "meters", "feet", "depth", "proximity"], |
| "height": ["tall", "high", "elevated", "above", "overhead", "ceiling", "tower", "roof", "floor"], |
| "crowded": ["crowded", "packed", "busy", "congested", "full", "dense", "many people", "occupied"], |
| "empty": ["empty", "vacant", "clear", "unoccupied", "deserted", "abandoned", "bare", "hollow"], |
| |
| |
| |
| |
| "noise": ["engine", "grinding", "mechanical", "hissing", "scraping", "clanking", "buzzing", "humming", "rattling", "banging", "crashing"], |
| "speech": ["speech", "talking", "speaking", "voice", "conversation", "dialogue", "words", "verbal", "announcement", "narration"], |
| "music": ["music", "singing", "song", "melody", "instrument", "playing", "beats", "rhythm", "piano", "guitar", "drum", "bass"], |
| "alarm": ["alarm", "siren", "beep", "alert", "warning", "horn", "buzzer", "ring"], |
| "scream": ["scream", "screaming", "shriek", "yell", "shout", "cry", "help", "distress"], |
| "gunshot": ["gunshot", "gunfire", "shooting", "bang", "shot", "firearm", "bullet"], |
| "engine": ["engine", "motor", "revving", "idling", "mechanical", "vehicle", "car engine", "machine"], |
| "glass": ["glass", "shatter", "breaking", "smash", "crack", "broken glass"], |
| "footsteps": ["footsteps", "walking", "running", "steps", "march", "stomp", "pacing"], |
| "barking": ["barking", "bark", "dog", "growl", "howl", "yelp", "whine"], |
| "crying": ["crying", "cry", "sobbing", "weeping", "whimpering", "wailing", "tears"], |
| "laughter": ["laughter", "laughing", "chuckle", "giggle", "funny", "comedy"], |
| "thunder": ["thunder", "lightning", "storm", "rumble", "boom"], |
| "knock": ["knock", "knocking", "door", "bang", "tap", "rapping"], |
| "horn": ["horn", "honking", "beep", "car horn", "truck horn", "signal"], |
| "whistle": ["whistle", "whistling", "wind", "referee", "train whistle", "signal"], |
| "clapping": ["clapping", "applause", "clap", "ovation", "cheering"], |
| "silence": ["quiet", "silent", "no sound", "calm", "peaceful", "still", "mute"], |
| "loud": ["loud", "noisy", "deafening", "blaring", "booming", "roaring", "thunderous"], |
| |
| |
| |
| |
| "security": ["guard", "camera", "surveillance", "monitor", "patrol", "fence", "gate", "badge", "uniform", "checkpoint", "authorized"], |
| "authorized": ["authorized", "identified", "recognized", "known", "verified", "approved", "cleared", "valid"], |
| "unauthorized": ["unauthorized", "unknown", "unrecognized", "stranger", "intruder", "trespassing", "invalid", "denied"], |
| "patrol": ["walking", "guard", "patrol", "route", "perimeter", "monitoring", "surveillance", "rounds"], |
| "trespassing": ["trespassing", "intruder", "unauthorized", "fence", "gate", "boundary", "restricted", "prohibited"], |
| "surveillance": ["camera", "monitor", "watching", "recording", "cctv", "footage", "surveillance", "tracking"], |
| "perimeter": ["fence", "wall", "gate", "boundary", "border", "barrier", "perimeter", "edge"], |
| |
| |
| |
| |
| "injury": ["blood", "wound", "cut", "bruise", "broken", "injured", "hurt", "bandage", "first aid", "trauma"], |
| "medical": ["hospital", "doctor", "nurse", "ambulance", "stretcher", "medicine", "pills", "injection", "stethoscope", "mask"], |
| "unconscious": ["unconscious", "fainted", "collapsed", "lying", "unresponsive", "still", "fallen", "motionless"], |
| "bleeding": ["blood", "bleeding", "wound", "cut", "injury", "red", "bandage"], |
| "fall": ["fall", "fallen", "collapsed", "on ground", "trip", "stumble", "slip", "lying down", "tumble"], |
| |
| |
| |
| |
| "clothing": ["shirt", "pants", "jacket", "coat", "dress", "skirt", "suit", "uniform", "shoes", "boots", "hat", "cap", "vest", "hoodie", "sweater"], |
| "shirt": ["shirt", "t-shirt", "top", "blouse", "polo", "jersey", "tank top"], |
| "pants": ["pants", "jeans", "trousers", "shorts", "leggings", "sweatpants"], |
| "jacket": ["jacket", "coat", "blazer", "hoodie", "sweater", "cardigan", "vest", "parka"], |
| "shoes": ["shoes", "boots", "sneakers", "sandals", "heels", "slippers", "footwear"], |
| "helmet": ["helmet", "hard hat", "safety helmet", "motorcycle helmet", "bike helmet"], |
| |
| |
| |
| |
| "food": ["food", "meal", "plate", "dish", "fruit", "vegetable", "meat", "bread", "rice", "pasta", "pizza", "burger", "sandwich", "snack", "dessert", "cake", "salad"], |
| "drink": ["drink", "water", "coffee", "tea", "juice", "soda", "beer", "wine", "cup", "glass", "bottle", "mug"], |
| |
| |
| |
| |
| "sports": ["ball", "soccer", "football", "basketball", "tennis", "baseball", "running", "swimming", "cycling", "boxing", "wrestling", "gym", "stadium", "field", "court"], |
| "swimming": ["swimming", "pool", "water", "diving", "swimmer", "stroke", "float", "splash"], |
| "boxing": ["boxing", "punching", "gloves", "ring", "fight", "knockout", "sparring"], |
| |
| |
| |
| |
| "screen": ["screen", "monitor", "display", "tv", "television", "phone", "tablet", "laptop", "computer"], |
| "camera": ["camera", "lens", "photo", "photography", "recording", "video", "flash", "tripod"], |
| "robot": ["robot", "drone", "machine", "automated", "mechanical", "ai", "sensor"], |
| "drone": ["drone", "quadcopter", "uav", "flying", "aerial", "remote control", "propeller"], |
| |
| |
| |
| |
| "metal": ["metal", "steel", "iron", "aluminum", "copper", "brass", "chrome", "metallic", "shiny"], |
| "wood": ["wood", "wooden", "timber", "plank", "board", "log", "oak", "pine", "mahogany"], |
| "glass": ["glass", "window", "transparent", "mirror", "reflection", "crystal", "pane"], |
| "fabric": ["fabric", "cloth", "textile", "cotton", "silk", "polyester", "linen", "wool", "leather"], |
| "concrete": ["concrete", "cement", "stone", "brick", "pavement", "asphalt", "gravel"], |
| "plastic": ["plastic", "polymer", "synthetic", "container", "wrap", "packaging"], |
| |
| |
| |
| |
| "light": ["light", "bright", "lamp", "bulb", "flashlight", "spotlight", "illuminated", "glowing", "lit"], |
| "shadow": ["shadow", "dark", "dim", "shade", "silhouette", "backlit", "contrast"], |
| "reflection": ["reflection", "mirror", "glass", "shiny", "glossy", "polished", "reflective"], |
| "fog": ["fog", "mist", "haze", "smog", "cloudy", "visibility", "obscured", "blurry"], |
| |
| |
| |
| |
| "many": ["many", "multiple", "several", "group", "crowd", "numerous", "various", "lots"], |
| "few": ["few", "couple", "pair", "some", "handful"], |
| "single": ["single", "one", "alone", "solo", "individual", "lone", "solitary"], |
| "none": ["none", "empty", "no", "zero", "absent", "missing", "not found", "not detected"], |
| } |
|
|
|
|
| def caption_checkmate(self, mission_prompt: str, specialist_captions: List[Dict[str, str]]) -> Dict[str, Any]: |
| """ |
| Smart Checkmate: Reads ALL specialist model captions to verify mission objectives. |
| |
| 1. Split prompt into individual objectives |
| 2. For each objective, scan ALL captions for keyword/semantic match |
| 3. Return checklist with ✅/❌ per objective + evidence |
| |
| Args: |
| mission_prompt: The user's original mission text |
| specialist_captions: List of {"model": "color_expert", "caption": "Dominant colors: red (23%)"} |
| |
| Returns: |
| { |
| "mission_status": "achieved" | "partially_achieved" | "searching", |
| "score": float (0.0-1.0), |
| "objectives": [{"text": ..., "satisfied": bool, "evidence": str | None, "matched_by": str | None}], |
| "status_message": str |
| } |
| """ |
| if not mission_prompt or not specialist_captions: |
| return { |
| "mission_status": "searching", |
| "score": 0.0, |
| "objectives": [], |
| "status_message": "Awaiting specialist reports..." |
| } |
|
|
| |
| objectives = self._split_objectives(mission_prompt) |
| logger.info(f"[SMART CHECKMATE] Parsed {len(objectives)} objectives from prompt: {objectives}") |
|
|
| |
| |
| caption_entries = [] |
| for cap in specialist_captions: |
| model = cap.get("model", "unknown") |
| |
| text = cap.get("caption") or cap.get("status") or cap.get("explanation") or "" |
| |
| if text and "unavailable" not in text.lower(): |
| |
| caption_entries.append({"model": model, "text": text.lower()}) |
|
|
| |
| results = [] |
| for obj_text in objectives: |
| matched, evidence, matched_by = self._match_objective(obj_text, caption_entries) |
| results.append({ |
| "text": obj_text, |
| "satisfied": matched, |
| "evidence": evidence, |
| "matched_by": matched_by |
| }) |
|
|
| |
| satisfied_count = sum(1 for r in results if r["satisfied"]) |
| total = len(results) |
| score = satisfied_count / total if total > 0 else 0.0 |
|
|
| if satisfied_count == total and total > 0: |
| mission_status = "achieved" |
| status_message = f"→ CHECKMATE: All {total} objectives verified by specialist reports." |
| elif satisfied_count > 0: |
| mission_status = "partially_achieved" |
| status_message = f"Mission {int(score * 100)}% complete: {satisfied_count}/{total} objectives verified." |
| else: |
| mission_status = "searching" |
| status_message = f"Monitoring... 0/{total} objectives detected so far." |
|
|
| logger.info(f"[SMART CHECKMATE] Result: {mission_status} ({satisfied_count}/{total})") |
| return { |
| "mission_status": mission_status, |
| "score": round(score, 2), |
| "objectives": results, |
| "status_message": status_message, |
| "satisfied": satisfied_count > 0 |
| } |
|
|
| def _split_objectives(self, prompt: str) -> List[str]: |
| """Split a mission prompt into granular objectives.""" |
| |
| text = prompt.lower().strip().strip('"').strip("'") |
| |
| |
| for prefix in ["detect ", "find ", "look for ", "search for ", "monitor for ", "check for ", "identify "]: |
| if text.startswith(prefix): |
| text = text[len(prefix):] |
| break |
| |
| |
| parts = re.split(r'[,;]|\band\b|\bthen\b|\bafter\b', text) |
| |
| |
| |
| granular_parts = [] |
| for p in parts: |
| |
| sub = re.split(r'\bor\b|\bnear\b|\bwith\b|\bwho\b|\bwearing\b|\bat\b|\bbeside\b|\bholding\b|\bcarrying\b|\busing\b|\bshowing\b|\bwalking\b|\brunning\b', p) |
| granular_parts.extend(sub) |
|
|
| |
| final_parts = [] |
| |
| |
| COMMAND_VERBS = [ |
| "detect", "find", "look for", "search for", "monitor for", "check for", |
| "identify", "study", "watch", "observe", "scan", "report", "notify", |
| "is there", "is", "there", "was", "were", "any sign of", "presence of", "evidence of", "show me", |
| "also", "then", "please", "can you", "try to" |
| ] |
| |
| ARTICLES = ["a ", "an ", "the ", "any ", "some ", "all ", "every "] |
|
|
| for gp in granular_parts: |
| cleaned = gp.strip() |
| |
| |
| changed = True |
| while changed: |
| original = cleaned |
| |
| |
| for verb in COMMAND_VERBS: |
| if cleaned.startswith(verb + " "): |
| cleaned = cleaned[len(verb):].strip() |
| elif cleaned.startswith(verb + "s "): |
| cleaned = cleaned[len(verb)+1:].strip() |
| |
| |
| for article in ARTICLES: |
| if cleaned.startswith(article): |
| cleaned = cleaned[len(article):].strip() |
| |
| |
| if cleaned.startswith("sign of "): cleaned = cleaned[8:].strip() |
| if cleaned.startswith("signs of "): cleaned = cleaned[9:].strip() |
| |
| changed = (cleaned != original) |
|
|
| if cleaned and len(cleaned) > 2: |
| final_parts.append(cleaned) |
| |
| |
| unique = [] |
| seen = set() |
| for p in final_parts: |
| if p not in seen: |
| seen.add(p) |
| unique.append(p) |
| |
| return unique if unique else [text] |
|
|
| def _match_objective(self, objective: str, caption_entries: List[Dict[str, str]]) -> tuple: |
| """ |
| Match a single objective against all specialist captions. |
| |
| KEY LOGIC: |
| 1. Strip "Scene context: ..." from captions (it's base perception echo, not findings) |
| 2. Check if the sentence containing the keyword is negated |
| 3. Only match on POSITIVE findings from specialists |
| |
| Returns: (matched: bool, evidence: str | None, matched_by: str | None) |
| """ |
| obj_words = objective.lower().split() |
| stop_words = {"a", "an", "the", "is", "are", "was", "were", "be", "been", "being", |
| "in", "on", "at", "to", "for", "of", "with", "by", "from", "it", |
| "if", "or", "not", "no", "what", "how", "sign", "signs", "near", "who", "which"} |
| meaningful_words = [w for w in obj_words if w not in stop_words and len(w) > 2] |
| |
| |
| negative_patterns = [ |
| r"no target .* detected", |
| r"no target .* match", |
| r"no readable text found", |
| r"no .* detected for mission", |
| r"no .* found in frame", |
| r"no .* match for mission", |
| r"no significant .* detected", |
| r"no clear .* detected", |
| r"no human poses detected", |
| r"no speech detected", |
| r"no notable .* found", |
| r"no .* identified", |
| r"scanning.*specifically", |
| r"searching for", |
| r"model unavailable", |
| r"unavailable", |
| r"no significant findings", |
| r"awaiting", |
| r"processing", |
| ] |
| |
| |
| negation_words = [] |
| |
|
|
| |
| |
| |
| |
| target_words = [w for w in meaningful_words if w not in CATEGORY_WORDS] |
| context_words = [w for w in meaningful_words if w in CATEGORY_WORDS] |
| |
| |
| if not target_words: |
| target_words = context_words |
|
|
| for entry in caption_entries: |
| raw_caption = entry["text"].lower() |
| model_name = entry["model"] |
| |
| |
| caption_text = raw_caption |
| scene_ctx_idx = caption_text.find("scene context:") |
| if scene_ctx_idx != -1: |
| caption_text = caption_text[:scene_ctx_idx].strip().rstrip(".") |
| |
| if not caption_text or len(caption_text) < 5: |
| continue |
| |
| |
| if any(re.search(pattern, caption_text) for pattern in negative_patterns): |
| continue |
| |
| |
| sentences = re.split(r'[.!?]+', caption_text) |
| |
| |
| positive_sentences = [] |
| for s in sentences: |
| s = s.strip() |
| if not s: continue |
| |
| if any(re.search(fr"\b{re.escape(neg)}\b", s) for neg in negation_words): |
| continue |
| positive_sentences.append(s) |
| |
| |
| |
| verified_context = " ".join(positive_sentences) |
| verified_context = verified_context.replace("/", " ").replace("(", " ").replace(")", " ") |
| |
| if not verified_context: |
| continue |
|
|
| |
| all_targets_verified = True |
| UTILITY_WORDS = {"sign", "evidence", "monitor", "detect", "finding", "presence", "detection", "check", "monitor"} |
| |
| for word in target_words: |
| |
| if word in UTILITY_WORDS: |
| continue |
| |
| word_found = False |
| |
| if re.search(fr"\b{re.escape(word)}\b", verified_context): |
| word_found = True |
| |
| elif word in self.concept_mirror: |
| for kw in self.concept_mirror[word]: |
| if re.search(fr"\b{re.escape(kw)}\b", verified_context): |
| word_found = True |
| break |
| |
| if not word_found: |
| all_targets_verified = False |
| break |
| |
| if all_targets_verified: |
| |
| |
| return (True, raw_caption[:120], model_name) |
| |
| return (False, None, None) |
| |
| return (False, None, None) |
|
|
|
|
| class BufferManager: |
| """Manages session-specific observation buffers.""" |
| def __init__(self, window_seconds: float = 10.0): |
| self.window_seconds = window_seconds |
| self.buffers: Dict[str, ObservationBuffer] = {} |
|
|
| def get_buffer(self, session_id: str) -> ObservationBuffer: |
| if session_id not in self.buffers: |
| self.buffers[session_id] = ObservationBuffer(window_seconds=self.window_seconds) |
| return self.buffers[session_id] |
|
|
| def clear_session(self, session_id: str): |
| if session_id in self.buffers: |
| del self.buffers[session_id] |
|
|
| |
|
|
| class MissionSupervisor: |
| """ |
| Adaptive controller that monitors mission progress and manages retries. |
| Implements the "Decision-Ready" logic engine. |
| """ |
| MAX_ATTEMPTS = 3 |
|
|
| def __init__(self, evaluator: MissionEvaluator): |
| self.evaluator = evaluator |
| self.attempts = {} |
| self.history = {} |
|
|
| def evaluate_and_supervise(self, session_id: str, world_state: Any) -> Dict[str, Any]: |
| """ |
| Stage 4 & 5 Combined: |
| 1. Evaluate mission (Stage 4) using the World State |
| 2. If failed, classify failure and adapt (Stage 5) |
| """ |
| result = self.evaluator.evaluate(world_state) |
| |
| |
| |
| ws_dict = world_state.to_dict() if hasattr(world_state, "to_dict") else world_state |
| high_risk_entities = [ |
| e for e in ws_dict.get("entities", []) |
| if (e.get("prediction") or {}).get("risk_score", 0) > 0.75 |
| ] |
| |
| if high_risk_entities: |
| result["early_warning"] = { |
| "level": "critical", |
| "message": f"Anticipatory threat forming! {len(high_risk_entities)} subjects showing risky behavior.", |
| "details": [e["prediction"] for e in high_risk_entities] |
| } |
|
|
| |
| if result["mission_status"] in ["achieved", "none"] or result["status_message"] == "No active mission.": |
| self.attempts[session_id] = 0 |
| self.history[session_id] = [] |
| return result |
|
|
| |
| current_attempt = self.attempts.get(session_id, 0) |
| |
| if current_attempt < self.MAX_ATTEMPTS: |
| |
| failure_type = self._diagnose_failure(world_state) |
| strategy = self._select_strategy(failure_type, self.history.get(session_id, [])) |
| |
| self.attempts[session_id] = current_attempt + 1 |
| if session_id not in self.history: self.history[session_id] = [] |
| self.history[session_id].append(strategy) |
| |
| result["mission_status"] = "retrying" |
| result["next_strategy"] = strategy |
| result["attempt"] = self.attempts[session_id] |
| logger.info(f"[SUPERVISOR] Attempt {result['attempt']} failed. Diagnosis: {failure_type}. Strategy: {strategy}") |
| else: |
| result["mission_status"] = "failed" |
| result["status_message"] = "Mission failed after 3 attempts. Target not found or unreachable." |
| logger.warning(f"[SUPERVISOR] Mission FAILED for session {session_id}.") |
|
|
| return result |
|
|
| def _diagnose_failure(self, world_state: Any) -> str: |
| """Situational Diagnostic Engine: Why is the mission stalling?""" |
| ws_dict = world_state.to_dict() if hasattr(world_state, "to_dict") else world_state |
| entities = ws_dict.get("entities", []) |
| |
| |
| if ws_dict.get("lighting") == "low": return "low_light" |
| if ws_dict.get("noise_level", 0.0) > 0.6: return "high_noise" |
| |
| |
| if entities: |
| best_e = max(entities, key=lambda e: e.get("confidence", 0)) |
| pred = best_e.get("prediction") or {} |
| |
| if pred.get("uncertainty", 0) > 0.6: return "high_perception_uncertainty" |
| if pred.get("risk_score", 0) > 0.6: return "high_anticipatory_risk" |
| |
| if best_e.get("confidence", 0) < 0.4: |
| return "low_confidence_match" |
| |
| |
| history = best_e.get("history", []) |
| if len(history) > 5: |
| states = [h.get("state") for h in history[-5:]] |
| if all(s == states[0] for s in states): |
| return "temporal_stagnation" |
| |
| |
| if ws_dict.get("target_status") == "occluded": return "occlusion" |
| |
| return "target_not_found" |
|
|
| def _select_strategy(self, failure_type: str, history: List[str]) -> str: |
| """ROI-driven Strategy Selection: Pick bestSuccess/Cost ratio.""" |
| |
| |
| strategies = { |
| "low_light": [ |
| ("enable_thermal", 0.9, 4), |
| ("increase_gain", 0.6, 1), |
| ("lower_thresholds", 0.2, 0) |
| ], |
| "high_noise": [ |
| ("switch_to_visual_only", 0.7, 1), |
| ("noise_gating", 0.5, 2) |
| ], |
| "low_confidence_match": [ |
| ("switch_to_specialist", 0.8, 2), |
| ("zoom_in", 0.5, 1) |
| ], |
| "high_perception_uncertainty": [ |
| ("active_sensing_sweep", 0.8, 3), |
| ("widen_fov", 0.4, 1) |
| ], |
| "high_anticipatory_risk": [ |
| ("increase_frame_rate", 0.7, 5), |
| ("switch_to_specialist", 0.9, 2) |
| ], |
| "temporal_stagnation": [ |
| ("widen_fov", 0.4, 1), |
| ("lower_semantic_thresholds", 0.3, 1) |
| ], |
| "target_not_found": [ |
| ("widen_fov", 0.5, 1), |
| ("request_human_clarification", 1.0, 10) |
| ] |
| } |
| |
| candidates = strategies.get(failure_type, [("lower_thresholds", 0.2, 0)]) |
| |
| |
| untried = [s for s in candidates if s[0] not in history] |
| if not untried: return "human_intervention_required" |
| |
| |
| |
| untried.sort(key=lambda x: x[1] - (x[2] * 0.1), reverse=True) |
| |
| return untried[0][0] |
|
|
| |
| mission_evaluator = MissionEvaluator() |
| buffer_manager = BufferManager() |
| mission_supervisor = MissionSupervisor(mission_evaluator) |
| buffer_manager = BufferManager() |
| mission_supervisor = MissionSupervisor(mission_evaluator) |
|
|