fix: akinator.py - add null safety for caption access
Browse files- video_intelligence/akinator.py +26 -26
video_intelligence/akinator.py
CHANGED
|
@@ -37,10 +37,10 @@ class AkinatorNode:
|
|
| 37 |
class AkinatorRefiner:
|
| 38 |
"""
|
| 39 |
Interactive tree-based refinement of search results.
|
| 40 |
-
|
| 41 |
Like Akinator: asks discriminative questions to narrow down
|
| 42 |
which video moments the user is looking for.
|
| 43 |
-
|
| 44 |
Algorithm:
|
| 45 |
1. Start with all candidate results
|
| 46 |
2. Extract attributes from each candidate (from detections + captions)
|
|
@@ -61,7 +61,7 @@ class AkinatorRefiner:
|
|
| 61 |
def start(self, results: List[QueryResult], query: str) -> Dict:
|
| 62 |
"""
|
| 63 |
Start the Akinator refinement process.
|
| 64 |
-
|
| 65 |
Returns:
|
| 66 |
{"status": "refining" | "done",
|
| 67 |
"count": int,
|
|
@@ -85,11 +85,11 @@ class AkinatorRefiner:
|
|
| 85 |
def answer(self, choice: str, query: str) -> Dict:
|
| 86 |
"""
|
| 87 |
Process user's answer and narrow down results.
|
| 88 |
-
|
| 89 |
Args:
|
| 90 |
choice: User's selected option
|
| 91 |
query: Original query for context
|
| 92 |
-
|
| 93 |
Returns:
|
| 94 |
Same format as start()
|
| 95 |
"""
|
|
@@ -178,7 +178,7 @@ class AkinatorRefiner:
|
|
| 178 |
}
|
| 179 |
|
| 180 |
def _extract_attributes(self, results: List[QueryResult],
|
| 181 |
-
|
| 182 |
"""
|
| 183 |
Extract splittable attributes from results.
|
| 184 |
Combines detection labels + caption-derived attributes.
|
|
@@ -191,25 +191,25 @@ class AkinatorRefiner:
|
|
| 191 |
attributes["object_type"].add(det.lower())
|
| 192 |
|
| 193 |
# From caption analysis
|
| 194 |
-
caption = result.caption.lower()
|
| 195 |
-
|
| 196 |
# Location
|
| 197 |
if "indoor" in caption or "inside" in caption or "room" in caption:
|
| 198 |
attributes["location"].add("indoor")
|
| 199 |
if "outdoor" in caption or "outside" in caption or "street" in caption:
|
| 200 |
attributes["location"].add("outdoor")
|
| 201 |
-
|
| 202 |
# Time of day
|
| 203 |
if any(w in caption for w in ["night", "dark", "evening"]):
|
| 204 |
attributes["time_of_day"].add("night")
|
| 205 |
if any(w in caption for w in ["day", "bright", "sunny", "morning", "afternoon"]):
|
| 206 |
attributes["time_of_day"].add("day")
|
| 207 |
-
|
| 208 |
# Colors
|
| 209 |
for color in ["red", "blue", "green", "white", "black", "yellow", "brown", "gray", "orange", "pink", "purple"]:
|
| 210 |
if color in caption:
|
| 211 |
attributes["dominant_color"].add(color)
|
| 212 |
-
|
| 213 |
# People count
|
| 214 |
if any(w in caption for w in ["crowd", "group", "many people", "several people"]):
|
| 215 |
attributes["people_density"].add("many")
|
|
@@ -217,7 +217,7 @@ class AkinatorRefiner:
|
|
| 217 |
attributes["people_density"].add("few")
|
| 218 |
elif "empty" in caption or "no one" in caption:
|
| 219 |
attributes["people_density"].add("none")
|
| 220 |
-
|
| 221 |
# Action
|
| 222 |
for action in ["walking", "running", "sitting", "standing", "driving", "talking", "eating"]:
|
| 223 |
if action in caption:
|
|
@@ -231,7 +231,7 @@ class AkinatorRefiner:
|
|
| 231 |
}
|
| 232 |
|
| 233 |
def _find_best_split(self, results: List[QueryResult],
|
| 234 |
-
|
| 235 |
"""
|
| 236 |
Find the attribute with highest information gain (like a decision tree).
|
| 237 |
"""
|
|
@@ -282,11 +282,11 @@ class AkinatorRefiner:
|
|
| 282 |
|
| 283 |
def _get_attribute_value(self, result: QueryResult, attr_name: str) -> List[str]:
|
| 284 |
"""Get the value(s) of an attribute for a result."""
|
| 285 |
-
caption = result.caption.lower()
|
| 286 |
-
|
| 287 |
if attr_name == "object_type":
|
| 288 |
return [d.lower() for d in result.detections]
|
| 289 |
-
|
| 290 |
elif attr_name == "location":
|
| 291 |
values = []
|
| 292 |
if any(w in caption for w in ["indoor", "inside", "room"]):
|
|
@@ -294,7 +294,7 @@ class AkinatorRefiner:
|
|
| 294 |
if any(w in caption for w in ["outdoor", "outside", "street"]):
|
| 295 |
values.append("outdoor")
|
| 296 |
return values
|
| 297 |
-
|
| 298 |
elif attr_name == "time_of_day":
|
| 299 |
values = []
|
| 300 |
if any(w in caption for w in ["night", "dark", "evening"]):
|
|
@@ -302,34 +302,34 @@ class AkinatorRefiner:
|
|
| 302 |
if any(w in caption for w in ["day", "bright", "sunny"]):
|
| 303 |
values.append("day")
|
| 304 |
return values
|
| 305 |
-
|
| 306 |
elif attr_name == "dominant_color":
|
| 307 |
-
return [c for c in ["red", "blue", "green", "white", "black", "yellow",
|
| 308 |
-
|
| 309 |
if c in caption]
|
| 310 |
-
|
| 311 |
elif attr_name == "people_density":
|
| 312 |
if any(w in caption for w in ["crowd", "group", "many"]):
|
| 313 |
return ["many"]
|
| 314 |
elif any(w in caption for w in ["person", "man", "woman"]):
|
| 315 |
return ["few"]
|
| 316 |
return ["none"]
|
| 317 |
-
|
| 318 |
elif attr_name == "action":
|
| 319 |
-
return [a for a in ["walking", "running", "sitting", "standing",
|
| 320 |
"driving", "talking", "eating"]
|
| 321 |
if a in caption]
|
| 322 |
-
|
| 323 |
return []
|
| 324 |
|
| 325 |
def _filter_by_choice(self, results: List[QueryResult],
|
| 326 |
-
|
| 327 |
"""Filter results that match the user's chosen attribute value."""
|
| 328 |
filtered = []
|
| 329 |
for r in results:
|
| 330 |
values = self._get_attribute_value(r, attribute)
|
| 331 |
if choice.lower() in [v.lower() for v in values]:
|
| 332 |
filtered.append(r)
|
| 333 |
-
|
| 334 |
# If filtering removed everything (edge case), return all
|
| 335 |
return filtered if filtered else results
|
|
|
|
| 37 |
class AkinatorRefiner:
|
| 38 |
"""
|
| 39 |
Interactive tree-based refinement of search results.
|
| 40 |
+
|
| 41 |
Like Akinator: asks discriminative questions to narrow down
|
| 42 |
which video moments the user is looking for.
|
| 43 |
+
|
| 44 |
Algorithm:
|
| 45 |
1. Start with all candidate results
|
| 46 |
2. Extract attributes from each candidate (from detections + captions)
|
|
|
|
| 61 |
def start(self, results: List[QueryResult], query: str) -> Dict:
|
| 62 |
"""
|
| 63 |
Start the Akinator refinement process.
|
| 64 |
+
|
| 65 |
Returns:
|
| 66 |
{"status": "refining" | "done",
|
| 67 |
"count": int,
|
|
|
|
| 85 |
def answer(self, choice: str, query: str) -> Dict:
|
| 86 |
"""
|
| 87 |
Process user's answer and narrow down results.
|
| 88 |
+
|
| 89 |
Args:
|
| 90 |
choice: User's selected option
|
| 91 |
query: Original query for context
|
| 92 |
+
|
| 93 |
Returns:
|
| 94 |
Same format as start()
|
| 95 |
"""
|
|
|
|
| 178 |
}
|
| 179 |
|
| 180 |
def _extract_attributes(self, results: List[QueryResult],
|
| 181 |
+
frame_ids: List[int]) -> Dict[str, List[str]]:
|
| 182 |
"""
|
| 183 |
Extract splittable attributes from results.
|
| 184 |
Combines detection labels + caption-derived attributes.
|
|
|
|
| 191 |
attributes["object_type"].add(det.lower())
|
| 192 |
|
| 193 |
# From caption analysis
|
| 194 |
+
caption = result.caption.lower() if result.caption else ""
|
| 195 |
+
|
| 196 |
# Location
|
| 197 |
if "indoor" in caption or "inside" in caption or "room" in caption:
|
| 198 |
attributes["location"].add("indoor")
|
| 199 |
if "outdoor" in caption or "outside" in caption or "street" in caption:
|
| 200 |
attributes["location"].add("outdoor")
|
| 201 |
+
|
| 202 |
# Time of day
|
| 203 |
if any(w in caption for w in ["night", "dark", "evening"]):
|
| 204 |
attributes["time_of_day"].add("night")
|
| 205 |
if any(w in caption for w in ["day", "bright", "sunny", "morning", "afternoon"]):
|
| 206 |
attributes["time_of_day"].add("day")
|
| 207 |
+
|
| 208 |
# Colors
|
| 209 |
for color in ["red", "blue", "green", "white", "black", "yellow", "brown", "gray", "orange", "pink", "purple"]:
|
| 210 |
if color in caption:
|
| 211 |
attributes["dominant_color"].add(color)
|
| 212 |
+
|
| 213 |
# People count
|
| 214 |
if any(w in caption for w in ["crowd", "group", "many people", "several people"]):
|
| 215 |
attributes["people_density"].add("many")
|
|
|
|
| 217 |
attributes["people_density"].add("few")
|
| 218 |
elif "empty" in caption or "no one" in caption:
|
| 219 |
attributes["people_density"].add("none")
|
| 220 |
+
|
| 221 |
# Action
|
| 222 |
for action in ["walking", "running", "sitting", "standing", "driving", "talking", "eating"]:
|
| 223 |
if action in caption:
|
|
|
|
| 231 |
}
|
| 232 |
|
| 233 |
def _find_best_split(self, results: List[QueryResult],
|
| 234 |
+
attributes: Dict[str, List[str]]) -> Tuple[Optional[str], float]:
|
| 235 |
"""
|
| 236 |
Find the attribute with highest information gain (like a decision tree).
|
| 237 |
"""
|
|
|
|
| 282 |
|
| 283 |
def _get_attribute_value(self, result: QueryResult, attr_name: str) -> List[str]:
|
| 284 |
"""Get the value(s) of an attribute for a result."""
|
| 285 |
+
caption = result.caption.lower() if result.caption else ""
|
| 286 |
+
|
| 287 |
if attr_name == "object_type":
|
| 288 |
return [d.lower() for d in result.detections]
|
| 289 |
+
|
| 290 |
elif attr_name == "location":
|
| 291 |
values = []
|
| 292 |
if any(w in caption for w in ["indoor", "inside", "room"]):
|
|
|
|
| 294 |
if any(w in caption for w in ["outdoor", "outside", "street"]):
|
| 295 |
values.append("outdoor")
|
| 296 |
return values
|
| 297 |
+
|
| 298 |
elif attr_name == "time_of_day":
|
| 299 |
values = []
|
| 300 |
if any(w in caption for w in ["night", "dark", "evening"]):
|
|
|
|
| 302 |
if any(w in caption for w in ["day", "bright", "sunny"]):
|
| 303 |
values.append("day")
|
| 304 |
return values
|
| 305 |
+
|
| 306 |
elif attr_name == "dominant_color":
|
| 307 |
+
return [c for c in ["red", "blue", "green", "white", "black", "yellow",
|
| 308 |
+
"brown", "gray", "orange", "pink", "purple"]
|
| 309 |
if c in caption]
|
| 310 |
+
|
| 311 |
elif attr_name == "people_density":
|
| 312 |
if any(w in caption for w in ["crowd", "group", "many"]):
|
| 313 |
return ["many"]
|
| 314 |
elif any(w in caption for w in ["person", "man", "woman"]):
|
| 315 |
return ["few"]
|
| 316 |
return ["none"]
|
| 317 |
+
|
| 318 |
elif attr_name == "action":
|
| 319 |
+
return [a for a in ["walking", "running", "sitting", "standing",
|
| 320 |
"driving", "talking", "eating"]
|
| 321 |
if a in caption]
|
| 322 |
+
|
| 323 |
return []
|
| 324 |
|
| 325 |
def _filter_by_choice(self, results: List[QueryResult],
|
| 326 |
+
attribute: str, choice: str) -> List[QueryResult]:
|
| 327 |
"""Filter results that match the user's chosen attribute value."""
|
| 328 |
filtered = []
|
| 329 |
for r in results:
|
| 330 |
values = self._get_attribute_value(r, attribute)
|
| 331 |
if choice.lower() in [v.lower() for v in values]:
|
| 332 |
filtered.append(r)
|
| 333 |
+
|
| 334 |
# If filtering removed everything (edge case), return all
|
| 335 |
return filtered if filtered else results
|