notRaphael
/

video-intelligence-platform

Model card Files Files and versions

xet

Community

notRaphael commited on Apr 26

Commit

6f0a386

verified ·

1 Parent(s): 89c35c3

fix: akinator.py - add null safety for caption access

Browse files

Files changed (1) hide show

video_intelligence/akinator.py +26 -26

video_intelligence/akinator.py CHANGED Viewed

@@ -37,10 +37,10 @@ class AkinatorNode:
 class AkinatorRefiner:
     """
     Interactive tree-based refinement of search results.
     Like Akinator: asks discriminative questions to narrow down
     which video moments the user is looking for.
     Algorithm:
     1. Start with all candidate results
     2. Extract attributes from each candidate (from detections + captions)
@@ -61,7 +61,7 @@ class AkinatorRefiner:
     def start(self, results: List[QueryResult], query: str) -> Dict:
         """
         Start the Akinator refinement process.
         Returns:
             {"status": "refining" | "done",
              "count": int,
@@ -85,11 +85,11 @@ class AkinatorRefiner:
     def answer(self, choice: str, query: str) -> Dict:
         """
         Process user's answer and narrow down results.
         Args:
             choice: User's selected option
             query: Original query for context
         Returns:
             Same format as start()
         """
@@ -178,7 +178,7 @@ class AkinatorRefiner:
         }
     def _extract_attributes(self, results: List[QueryResult],
-                              frame_ids: List[int]) -> Dict[str, List[str]]:
         """
         Extract splittable attributes from results.
         Combines detection labels + caption-derived attributes.
@@ -191,25 +191,25 @@ class AkinatorRefiner:
                 attributes["object_type"].add(det.lower())
             # From caption analysis
-            caption = result.caption.lower()
             # Location
             if "indoor" in caption or "inside" in caption or "room" in caption:
                 attributes["location"].add("indoor")
             if "outdoor" in caption or "outside" in caption or "street" in caption:
                 attributes["location"].add("outdoor")
             # Time of day
             if any(w in caption for w in ["night", "dark", "evening"]):
                 attributes["time_of_day"].add("night")
             if any(w in caption for w in ["day", "bright", "sunny", "morning", "afternoon"]):
                 attributes["time_of_day"].add("day")
             # Colors
             for color in ["red", "blue", "green", "white", "black", "yellow", "brown", "gray", "orange", "pink", "purple"]:
                 if color in caption:
                     attributes["dominant_color"].add(color)
             # People count
             if any(w in caption for w in ["crowd", "group", "many people", "several people"]):
                 attributes["people_density"].add("many")
@@ -217,7 +217,7 @@ class AkinatorRefiner:
                 attributes["people_density"].add("few")
             elif "empty" in caption or "no one" in caption:
                 attributes["people_density"].add("none")
             # Action
             for action in ["walking", "running", "sitting", "standing", "driving", "talking", "eating"]:
                 if action in caption:
@@ -231,7 +231,7 @@ class AkinatorRefiner:
         }
     def _find_best_split(self, results: List[QueryResult],
-                          attributes: Dict[str, List[str]]) -> Tuple[Optional[str], float]:
         """
         Find the attribute with highest information gain (like a decision tree).
         """
@@ -282,11 +282,11 @@ class AkinatorRefiner:
     def _get_attribute_value(self, result: QueryResult, attr_name: str) -> List[str]:
         """Get the value(s) of an attribute for a result."""
-        caption = result.caption.lower()
         if attr_name == "object_type":
             return [d.lower() for d in result.detections]
         elif attr_name == "location":
             values = []
             if any(w in caption for w in ["indoor", "inside", "room"]):
@@ -294,7 +294,7 @@ class AkinatorRefiner:
             if any(w in caption for w in ["outdoor", "outside", "street"]):
                 values.append("outdoor")
             return values
         elif attr_name == "time_of_day":
             values = []
             if any(w in caption for w in ["night", "dark", "evening"]):
@@ -302,34 +302,34 @@ class AkinatorRefiner:
             if any(w in caption for w in ["day", "bright", "sunny"]):
                 values.append("day")
             return values
         elif attr_name == "dominant_color":
-            return [c for c in ["red", "blue", "green", "white", "black", "yellow",
-                                 "brown", "gray", "orange", "pink", "purple"]
                     if c in caption]
         elif attr_name == "people_density":
             if any(w in caption for w in ["crowd", "group", "many"]):
                 return ["many"]
             elif any(w in caption for w in ["person", "man", "woman"]):
                 return ["few"]
             return ["none"]
         elif attr_name == "action":
-            return [a for a in ["walking", "running", "sitting", "standing",
                                 "driving", "talking", "eating"]
                     if a in caption]
         return []
     def _filter_by_choice(self, results: List[QueryResult],
-                           attribute: str, choice: str) -> List[QueryResult]:
         """Filter results that match the user's chosen attribute value."""
         filtered = []
         for r in results:
             values = self._get_attribute_value(r, attribute)
             if choice.lower() in [v.lower() for v in values]:
                 filtered.append(r)
         # If filtering removed everything (edge case), return all
         return filtered if filtered else results

 class AkinatorRefiner:
     """
     Interactive tree-based refinement of search results.
     Like Akinator: asks discriminative questions to narrow down
     which video moments the user is looking for.
     Algorithm:
     1. Start with all candidate results
     2. Extract attributes from each candidate (from detections + captions)
     def start(self, results: List[QueryResult], query: str) -> Dict:
         """
         Start the Akinator refinement process.
         Returns:
             {"status": "refining" | "done",
              "count": int,
     def answer(self, choice: str, query: str) -> Dict:
         """
         Process user's answer and narrow down results.
         Args:
             choice: User's selected option
             query: Original query for context
         Returns:
             Same format as start()
         """
         }
     def _extract_attributes(self, results: List[QueryResult],
+                            frame_ids: List[int]) -> Dict[str, List[str]]:
         """
         Extract splittable attributes from results.
         Combines detection labels + caption-derived attributes.
                 attributes["object_type"].add(det.lower())
             # From caption analysis
+            caption = result.caption.lower() if result.caption else ""
             # Location
             if "indoor" in caption or "inside" in caption or "room" in caption:
                 attributes["location"].add("indoor")
             if "outdoor" in caption or "outside" in caption or "street" in caption:
                 attributes["location"].add("outdoor")
             # Time of day
             if any(w in caption for w in ["night", "dark", "evening"]):
                 attributes["time_of_day"].add("night")
             if any(w in caption for w in ["day", "bright", "sunny", "morning", "afternoon"]):
                 attributes["time_of_day"].add("day")
             # Colors
             for color in ["red", "blue", "green", "white", "black", "yellow", "brown", "gray", "orange", "pink", "purple"]:
                 if color in caption:
                     attributes["dominant_color"].add(color)
             # People count
             if any(w in caption for w in ["crowd", "group", "many people", "several people"]):
                 attributes["people_density"].add("many")
                 attributes["people_density"].add("few")
             elif "empty" in caption or "no one" in caption:
                 attributes["people_density"].add("none")
             # Action
             for action in ["walking", "running", "sitting", "standing", "driving", "talking", "eating"]:
                 if action in caption:
         }
     def _find_best_split(self, results: List[QueryResult],
+                         attributes: Dict[str, List[str]]) -> Tuple[Optional[str], float]:
         """
         Find the attribute with highest information gain (like a decision tree).
         """
     def _get_attribute_value(self, result: QueryResult, attr_name: str) -> List[str]:
         """Get the value(s) of an attribute for a result."""
+        caption = result.caption.lower() if result.caption else ""
         if attr_name == "object_type":
             return [d.lower() for d in result.detections]
         elif attr_name == "location":
             values = []
             if any(w in caption for w in ["indoor", "inside", "room"]):
             if any(w in caption for w in ["outdoor", "outside", "street"]):
                 values.append("outdoor")
             return values
         elif attr_name == "time_of_day":
             values = []
             if any(w in caption for w in ["night", "dark", "evening"]):
             if any(w in caption for w in ["day", "bright", "sunny"]):
                 values.append("day")
             return values
         elif attr_name == "dominant_color":
+            return [c for c in ["red", "blue", "green", "white", "black", "yellow",
+                                "brown", "gray", "orange", "pink", "purple"]
                     if c in caption]
         elif attr_name == "people_density":
             if any(w in caption for w in ["crowd", "group", "many"]):
                 return ["many"]
             elif any(w in caption for w in ["person", "man", "woman"]):
                 return ["few"]
             return ["none"]
         elif attr_name == "action":
+            return [a for a in ["walking", "running", "sitting", "standing",
                                 "driving", "talking", "eating"]
                     if a in caption]
         return []
     def _filter_by_choice(self, results: List[QueryResult],
+                          attribute: str, choice: str) -> List[QueryResult]:
         """Filter results that match the user's chosen attribute value."""
         filtered = []
         for r in results:
             values = self._get_attribute_value(r, attribute)
             if choice.lower() in [v.lower() for v in values]:
                 filtered.append(r)
         # If filtering removed everything (edge case), return all
         return filtered if filtered else results