notRaphael commited on
Commit
6f0a386
·
verified ·
1 Parent(s): 89c35c3

fix: akinator.py - add null safety for caption access

Browse files
Files changed (1) hide show
  1. video_intelligence/akinator.py +26 -26
video_intelligence/akinator.py CHANGED
@@ -37,10 +37,10 @@ class AkinatorNode:
37
  class AkinatorRefiner:
38
  """
39
  Interactive tree-based refinement of search results.
40
-
41
  Like Akinator: asks discriminative questions to narrow down
42
  which video moments the user is looking for.
43
-
44
  Algorithm:
45
  1. Start with all candidate results
46
  2. Extract attributes from each candidate (from detections + captions)
@@ -61,7 +61,7 @@ class AkinatorRefiner:
61
  def start(self, results: List[QueryResult], query: str) -> Dict:
62
  """
63
  Start the Akinator refinement process.
64
-
65
  Returns:
66
  {"status": "refining" | "done",
67
  "count": int,
@@ -85,11 +85,11 @@ class AkinatorRefiner:
85
  def answer(self, choice: str, query: str) -> Dict:
86
  """
87
  Process user's answer and narrow down results.
88
-
89
  Args:
90
  choice: User's selected option
91
  query: Original query for context
92
-
93
  Returns:
94
  Same format as start()
95
  """
@@ -178,7 +178,7 @@ class AkinatorRefiner:
178
  }
179
 
180
  def _extract_attributes(self, results: List[QueryResult],
181
- frame_ids: List[int]) -> Dict[str, List[str]]:
182
  """
183
  Extract splittable attributes from results.
184
  Combines detection labels + caption-derived attributes.
@@ -191,25 +191,25 @@ class AkinatorRefiner:
191
  attributes["object_type"].add(det.lower())
192
 
193
  # From caption analysis
194
- caption = result.caption.lower()
195
-
196
  # Location
197
  if "indoor" in caption or "inside" in caption or "room" in caption:
198
  attributes["location"].add("indoor")
199
  if "outdoor" in caption or "outside" in caption or "street" in caption:
200
  attributes["location"].add("outdoor")
201
-
202
  # Time of day
203
  if any(w in caption for w in ["night", "dark", "evening"]):
204
  attributes["time_of_day"].add("night")
205
  if any(w in caption for w in ["day", "bright", "sunny", "morning", "afternoon"]):
206
  attributes["time_of_day"].add("day")
207
-
208
  # Colors
209
  for color in ["red", "blue", "green", "white", "black", "yellow", "brown", "gray", "orange", "pink", "purple"]:
210
  if color in caption:
211
  attributes["dominant_color"].add(color)
212
-
213
  # People count
214
  if any(w in caption for w in ["crowd", "group", "many people", "several people"]):
215
  attributes["people_density"].add("many")
@@ -217,7 +217,7 @@ class AkinatorRefiner:
217
  attributes["people_density"].add("few")
218
  elif "empty" in caption or "no one" in caption:
219
  attributes["people_density"].add("none")
220
-
221
  # Action
222
  for action in ["walking", "running", "sitting", "standing", "driving", "talking", "eating"]:
223
  if action in caption:
@@ -231,7 +231,7 @@ class AkinatorRefiner:
231
  }
232
 
233
  def _find_best_split(self, results: List[QueryResult],
234
- attributes: Dict[str, List[str]]) -> Tuple[Optional[str], float]:
235
  """
236
  Find the attribute with highest information gain (like a decision tree).
237
  """
@@ -282,11 +282,11 @@ class AkinatorRefiner:
282
 
283
  def _get_attribute_value(self, result: QueryResult, attr_name: str) -> List[str]:
284
  """Get the value(s) of an attribute for a result."""
285
- caption = result.caption.lower()
286
-
287
  if attr_name == "object_type":
288
  return [d.lower() for d in result.detections]
289
-
290
  elif attr_name == "location":
291
  values = []
292
  if any(w in caption for w in ["indoor", "inside", "room"]):
@@ -294,7 +294,7 @@ class AkinatorRefiner:
294
  if any(w in caption for w in ["outdoor", "outside", "street"]):
295
  values.append("outdoor")
296
  return values
297
-
298
  elif attr_name == "time_of_day":
299
  values = []
300
  if any(w in caption for w in ["night", "dark", "evening"]):
@@ -302,34 +302,34 @@ class AkinatorRefiner:
302
  if any(w in caption for w in ["day", "bright", "sunny"]):
303
  values.append("day")
304
  return values
305
-
306
  elif attr_name == "dominant_color":
307
- return [c for c in ["red", "blue", "green", "white", "black", "yellow",
308
- "brown", "gray", "orange", "pink", "purple"]
309
  if c in caption]
310
-
311
  elif attr_name == "people_density":
312
  if any(w in caption for w in ["crowd", "group", "many"]):
313
  return ["many"]
314
  elif any(w in caption for w in ["person", "man", "woman"]):
315
  return ["few"]
316
  return ["none"]
317
-
318
  elif attr_name == "action":
319
- return [a for a in ["walking", "running", "sitting", "standing",
320
  "driving", "talking", "eating"]
321
  if a in caption]
322
-
323
  return []
324
 
325
  def _filter_by_choice(self, results: List[QueryResult],
326
- attribute: str, choice: str) -> List[QueryResult]:
327
  """Filter results that match the user's chosen attribute value."""
328
  filtered = []
329
  for r in results:
330
  values = self._get_attribute_value(r, attribute)
331
  if choice.lower() in [v.lower() for v in values]:
332
  filtered.append(r)
333
-
334
  # If filtering removed everything (edge case), return all
335
  return filtered if filtered else results
 
37
  class AkinatorRefiner:
38
  """
39
  Interactive tree-based refinement of search results.
40
+
41
  Like Akinator: asks discriminative questions to narrow down
42
  which video moments the user is looking for.
43
+
44
  Algorithm:
45
  1. Start with all candidate results
46
  2. Extract attributes from each candidate (from detections + captions)
 
61
  def start(self, results: List[QueryResult], query: str) -> Dict:
62
  """
63
  Start the Akinator refinement process.
64
+
65
  Returns:
66
  {"status": "refining" | "done",
67
  "count": int,
 
85
  def answer(self, choice: str, query: str) -> Dict:
86
  """
87
  Process user's answer and narrow down results.
88
+
89
  Args:
90
  choice: User's selected option
91
  query: Original query for context
92
+
93
  Returns:
94
  Same format as start()
95
  """
 
178
  }
179
 
180
  def _extract_attributes(self, results: List[QueryResult],
181
+ frame_ids: List[int]) -> Dict[str, List[str]]:
182
  """
183
  Extract splittable attributes from results.
184
  Combines detection labels + caption-derived attributes.
 
191
  attributes["object_type"].add(det.lower())
192
 
193
  # From caption analysis
194
+ caption = result.caption.lower() if result.caption else ""
195
+
196
  # Location
197
  if "indoor" in caption or "inside" in caption or "room" in caption:
198
  attributes["location"].add("indoor")
199
  if "outdoor" in caption or "outside" in caption or "street" in caption:
200
  attributes["location"].add("outdoor")
201
+
202
  # Time of day
203
  if any(w in caption for w in ["night", "dark", "evening"]):
204
  attributes["time_of_day"].add("night")
205
  if any(w in caption for w in ["day", "bright", "sunny", "morning", "afternoon"]):
206
  attributes["time_of_day"].add("day")
207
+
208
  # Colors
209
  for color in ["red", "blue", "green", "white", "black", "yellow", "brown", "gray", "orange", "pink", "purple"]:
210
  if color in caption:
211
  attributes["dominant_color"].add(color)
212
+
213
  # People count
214
  if any(w in caption for w in ["crowd", "group", "many people", "several people"]):
215
  attributes["people_density"].add("many")
 
217
  attributes["people_density"].add("few")
218
  elif "empty" in caption or "no one" in caption:
219
  attributes["people_density"].add("none")
220
+
221
  # Action
222
  for action in ["walking", "running", "sitting", "standing", "driving", "talking", "eating"]:
223
  if action in caption:
 
231
  }
232
 
233
  def _find_best_split(self, results: List[QueryResult],
234
+ attributes: Dict[str, List[str]]) -> Tuple[Optional[str], float]:
235
  """
236
  Find the attribute with highest information gain (like a decision tree).
237
  """
 
282
 
283
  def _get_attribute_value(self, result: QueryResult, attr_name: str) -> List[str]:
284
  """Get the value(s) of an attribute for a result."""
285
+ caption = result.caption.lower() if result.caption else ""
286
+
287
  if attr_name == "object_type":
288
  return [d.lower() for d in result.detections]
289
+
290
  elif attr_name == "location":
291
  values = []
292
  if any(w in caption for w in ["indoor", "inside", "room"]):
 
294
  if any(w in caption for w in ["outdoor", "outside", "street"]):
295
  values.append("outdoor")
296
  return values
297
+
298
  elif attr_name == "time_of_day":
299
  values = []
300
  if any(w in caption for w in ["night", "dark", "evening"]):
 
302
  if any(w in caption for w in ["day", "bright", "sunny"]):
303
  values.append("day")
304
  return values
305
+
306
  elif attr_name == "dominant_color":
307
+ return [c for c in ["red", "blue", "green", "white", "black", "yellow",
308
+ "brown", "gray", "orange", "pink", "purple"]
309
  if c in caption]
310
+
311
  elif attr_name == "people_density":
312
  if any(w in caption for w in ["crowd", "group", "many"]):
313
  return ["many"]
314
  elif any(w in caption for w in ["person", "man", "woman"]):
315
  return ["few"]
316
  return ["none"]
317
+
318
  elif attr_name == "action":
319
+ return [a for a in ["walking", "running", "sitting", "standing",
320
  "driving", "talking", "eating"]
321
  if a in caption]
322
+
323
  return []
324
 
325
  def _filter_by_choice(self, results: List[QueryResult],
326
+ attribute: str, choice: str) -> List[QueryResult]:
327
  """Filter results that match the user's chosen attribute value."""
328
  filtered = []
329
  for r in results:
330
  values = self._get_attribute_value(r, attribute)
331
  if choice.lower() in [v.lower() for v in values]:
332
  filtered.append(r)
333
+
334
  # If filtering removed everything (edge case), return all
335
  return filtered if filtered else results