CB commited on
Commit
a43f860
·
verified ·
1 Parent(s): 0b25ca5

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +48 -95
streamlit_app.py CHANGED
@@ -15,7 +15,6 @@ from dotenv import load_dotenv
15
 
16
  load_dotenv()
17
 
18
- # Optional phi/GenAI imports — gracefully degrade if not present
19
  try:
20
  from phi.agent import Agent
21
  from phi.model.google import Gemini
@@ -38,7 +37,6 @@ st.set_page_config(page_title="Generate the story of videos", layout="wide")
38
  DATA_DIR = Path("./data")
39
  DATA_DIR.mkdir(exist_ok=True)
40
 
41
- # Session defaults
42
  st.session_state.setdefault("videos", "")
43
  st.session_state.setdefault("loop_video", False)
44
  st.session_state.setdefault("uploaded_file", None)
@@ -51,12 +49,10 @@ st.session_state.setdefault("file_hash", None)
51
  st.session_state.setdefault("fast_mode", False)
52
  st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
53
  st.session_state.setdefault("last_model", "")
54
- st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
55
  st.session_state.setdefault("last_url_value", "")
56
 
57
  def sanitize_filename(path_str: str):
58
- name = Path(path_str).name
59
- return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
60
 
61
  def file_sha256(path: str, block_size: int = 65536) -> str:
62
  h = hashlib.sha256()
@@ -107,27 +103,24 @@ def file_name_or_id(file_obj):
107
  return None
108
  if isinstance(file_obj, dict):
109
  return file_obj.get("name") or file_obj.get("id")
110
- # object-like
111
- for attr in ("name", "id", "fileId", "file_id", "file_id"):
112
  if hasattr(file_obj, attr):
113
  val = getattr(file_obj, attr)
114
  if val:
115
  return val
116
- # fallback to string
117
  return str(file_obj)
118
 
119
  def get_effective_api_key():
120
  return st.session_state.get("api_key") or os.getenv("GOOGLE_API_KEY")
121
 
122
- def configure_genai_if_needed():
123
- key = get_effective_api_key()
124
- if not key:
125
  return False
126
  try:
127
  genai.configure(api_key=key)
 
128
  except Exception:
129
- pass
130
- return True
131
 
132
  _agent = None
133
  def maybe_create_agent(model_id: str):
@@ -160,7 +153,6 @@ def clear_all_video_state():
160
  except Exception:
161
  pass
162
 
163
- # track url changes
164
  current_url = st.session_state.get("url", "")
165
  if current_url != st.session_state.get("last_url_value"):
166
  clear_all_video_state()
@@ -170,16 +162,15 @@ st.sidebar.header("Video Input")
170
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
171
 
172
  settings_exp = st.sidebar.expander("Settings", expanded=False)
173
- model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.5-flash-lite", key="model_input")
174
  settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
175
  default_prompt = (
176
  "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
177
  )
178
- analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
179
  settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
180
  settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
181
 
182
- # Show which key is active
183
  key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
184
  settings_exp.caption(f"Using API key from: **{key_source}**")
185
 
@@ -214,7 +205,6 @@ def wait_for_processed(file_obj, timeout=180):
214
  try:
215
  obj = get_file(name)
216
  except Exception:
217
- # if the SDK fails, return original object
218
  return file_obj
219
  state = getattr(obj, "state", None)
220
  if not state or getattr(state, "name", None) != "PROCESSING":
@@ -295,7 +285,6 @@ if st.session_state["videos"]:
295
  except Exception:
296
  pass
297
 
298
- # --- Generation flow ---
299
  if generate_now and not st.session_state.get("busy"):
300
  if not st.session_state.get("videos"):
301
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -306,11 +295,7 @@ if generate_now and not st.session_state.get("busy"):
306
  else:
307
  try:
308
  st.session_state["busy"] = True
309
- try:
310
- if HAS_GENAI and genai is not None:
311
- genai.configure(api_key=key_to_use)
312
- except Exception:
313
- pass
314
 
315
  model_id = (st.session_state.get("model_input") or "gemini-2.5-flash-lite").strip()
316
  if st.session_state.get("last_model") != model_id:
@@ -355,59 +340,35 @@ if generate_now and not st.session_state.get("busy"):
355
  st.session_state["last_loaded_path"] = current_path
356
  st.session_state["file_hash"] = current_hash
357
 
358
- prompt_text = (analysis_prompt.strip() or default_prompt).strip()
359
 
360
  out = ""
361
- if st.session_state.get("fast_mode"):
362
- model_used = model_id if model_id else "gemini-2.5-flash-lite"
363
- max_tokens = 512
364
- else:
365
- model_used = model_id
366
- max_tokens = 1024
367
-
368
  est_tokens = max_tokens
369
- est_cost_caption = f"Est. max tokens: {est_tokens}"
370
 
371
- # First try Agent, but guard and FALLBACK to direct genai responses if Agent fails or returns empty.
372
  agent = maybe_create_agent(model_used)
373
- debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None, "agent_response_has_text": False}
374
  if agent:
375
  debug_info["agent_attempted"] = True
376
  try:
377
- with st.spinner("Generating description via Agent..."):
378
  if not processed:
379
  raise RuntimeError("Processed file missing for agent generation")
380
- # call agent.run inside try/except to catch library IndexError
381
  agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
382
- # Try to extract text from common attributes; be defensive
383
- agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None) or None
384
- if not agent_text:
385
- # try dict-like access
386
- try:
387
- if isinstance(agent_response, dict):
388
- # check common keys
389
- for k in ("content", "outputText", "text"):
390
- if k in agent_response and agent_response[k]:
391
- agent_text = agent_response[k]
392
- break
393
- except Exception:
394
- pass
395
  if agent_text and str(agent_text).strip():
396
  out = str(agent_text).strip()
397
  debug_info["agent_ok"] = True
398
- debug_info["agent_response_has_text"] = True
399
- else:
400
- # Agent returned but had no usable text; set a marker to fallback
401
- debug_info["agent_ok"] = False
402
  except Exception as ae:
403
- # Save agent error and continue to fallback path instead of crashing
404
- debug_info["agent_error"] = f"{ae}"
405
- # include traceback for debugging
406
- debug_info["agent_traceback"] = traceback.format_exc()
407
- # Do not re-raise; we'll fallback to genai.responses.generate below
408
 
409
  if not out:
410
- # Fallback to direct Responses API flow (robust multi-version support)
411
  try:
412
  if not HAS_GENAI or genai is None:
413
  raise RuntimeError("Responses API not available; install google.generativeai SDK.")
@@ -415,12 +376,13 @@ if generate_now and not st.session_state.get("busy"):
415
  fname = file_name_or_id(processed)
416
  if not fname:
417
  raise RuntimeError("Uploaded file missing name/id")
418
-
419
  system_msg = {"role": "system", "content": prompt_text}
420
  user_msg = {"role": "user", "content": "Please summarize the attached video."}
421
-
422
  response = None
423
- # Try 1: new-style responses API (genai.responses.generate)
 
424
  try:
425
  if hasattr(genai, "responses") and hasattr(genai.responses, "generate"):
426
  response = genai.responses.generate(
@@ -432,8 +394,8 @@ if generate_now and not st.session_state.get("busy"):
432
  )
433
  except Exception:
434
  response = None
435
-
436
- # Try 2: model-based interface (GenerativeModel / model.generate_content)
437
  if response is None:
438
  try:
439
  if hasattr(genai, "GenerativeModel"):
@@ -444,8 +406,8 @@ if generate_now and not st.session_state.get("busy"):
444
  response = model_obj.generate([system_msg, user_msg], files=[{"name": fname}], max_output_tokens=max_tokens)
445
  except Exception:
446
  response = None
447
-
448
- # Try 3: generic genai.generate / genai.create
449
  if response is None:
450
  try:
451
  if hasattr(genai, "generate"):
@@ -454,15 +416,12 @@ if generate_now and not st.session_state.get("busy"):
454
  response = genai.create(model=model_used, input=[{"text": prompt_text, "files": [{"name": fname}]}], max_output_tokens=max_tokens)
455
  except Exception:
456
  response = None
457
-
458
  if response is None:
459
- raise RuntimeError("No supported generate method found on google.generativeai; check SDK version.")
460
-
461
- # Defensive normalization of response -> outputs list
462
  outputs = []
463
- if response is None:
464
- outputs = []
465
- elif isinstance(response, dict):
466
  for key in ("output", "candidates", "items", "responses"):
467
  val = response.get(key)
468
  if isinstance(val, list) and val:
@@ -479,34 +438,31 @@ if generate_now and not st.session_state.get("busy"):
479
  if isinstance(val, list) and val:
480
  outputs = val
481
  break
482
-
483
  if not isinstance(outputs, list):
484
  outputs = list(outputs) if outputs else []
485
-
486
- # extract text pieces safely
487
  text_pieces = []
488
  for item in outputs:
489
  if item is None:
490
  continue
491
- cand_contents = None
492
  if isinstance(item, dict):
493
  for k in ("content", "text", "message", "output_text", "output"):
494
  if k in item and item[k]:
495
- cand_contents = item[k]
496
  break
497
  else:
498
  for k in ("content", "text", "message", "output", "output_text"):
499
- cand_contents = getattr(item, k, None)
500
- if cand_contents:
501
  break
502
-
503
- if isinstance(cand_contents, str):
504
- if cand_contents.strip():
505
- text_pieces.append(cand_contents.strip())
506
  continue
507
-
508
- if isinstance(cand_contents, (list, tuple)):
509
- for c in cand_contents:
510
  if c is None:
511
  continue
512
  if isinstance(c, str):
@@ -520,7 +476,6 @@ if generate_now and not st.session_state.get("busy"):
520
  if t:
521
  text_pieces.append(str(t).strip())
522
  continue
523
-
524
  direct = None
525
  if isinstance(item, dict):
526
  direct = item.get("text") or item.get("output_text") or item.get("message")
@@ -528,7 +483,7 @@ if generate_now and not st.session_state.get("busy"):
528
  direct = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
529
  if direct:
530
  text_pieces.append(str(direct).strip())
531
-
532
  if not text_pieces:
533
  top_text = None
534
  if isinstance(response, dict):
@@ -537,8 +492,7 @@ if generate_now and not st.session_state.get("busy"):
537
  top_text = getattr(response, "text", None) or getattr(response, "message", None)
538
  if top_text:
539
  text_pieces.append(str(top_text).strip())
540
-
541
- # dedupe preserving order
542
  seen = set()
543
  filtered = []
544
  for t in text_pieces:
@@ -548,14 +502,13 @@ if generate_now and not st.session_state.get("busy"):
548
  filtered.append(t)
549
  seen.add(t)
550
  out = "\n\n".join(filtered)
551
-
552
  except Exception as e:
553
  tb = traceback.format_exc()
554
  st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"
555
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
556
  out = ""
557
 
558
- # post-process output
559
  if out:
560
  out = remove_prompt_echo(prompt_text, out)
561
  p = prompt_text
@@ -577,7 +530,7 @@ if generate_now and not st.session_state.get("busy"):
577
 
578
  except Exception as e:
579
  tb = traceback.format_exc()
580
- st.session_state["last_error"] = f"{e}\n\nDebug: {locals().get('debug_info', {})}\n\nTraceback:\n{tb}"
581
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
582
  finally:
583
  st.session_state["busy"] = False
 
15
 
16
  load_dotenv()
17
 
 
18
  try:
19
  from phi.agent import Agent
20
  from phi.model.google import Gemini
 
37
  DATA_DIR = Path("./data")
38
  DATA_DIR.mkdir(exist_ok=True)
39
 
 
40
  st.session_state.setdefault("videos", "")
41
  st.session_state.setdefault("loop_video", False)
42
  st.session_state.setdefault("uploaded_file", None)
 
49
  st.session_state.setdefault("fast_mode", False)
50
  st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
51
  st.session_state.setdefault("last_model", "")
 
52
  st.session_state.setdefault("last_url_value", "")
53
 
54
  def sanitize_filename(path_str: str):
55
+ return Path(path_str).name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 
56
 
57
  def file_sha256(path: str, block_size: int = 65536) -> str:
58
  h = hashlib.sha256()
 
103
  return None
104
  if isinstance(file_obj, dict):
105
  return file_obj.get("name") or file_obj.get("id")
106
+ for attr in ("name", "id", "fileId", "file_id"):
 
107
  if hasattr(file_obj, attr):
108
  val = getattr(file_obj, attr)
109
  if val:
110
  return val
 
111
  return str(file_obj)
112
 
113
  def get_effective_api_key():
114
  return st.session_state.get("api_key") or os.getenv("GOOGLE_API_KEY")
115
 
116
+ def maybe_configure_genai(key):
117
+ if not key or not HAS_GENAI:
 
118
  return False
119
  try:
120
  genai.configure(api_key=key)
121
+ return True
122
  except Exception:
123
+ return False
 
124
 
125
  _agent = None
126
  def maybe_create_agent(model_id: str):
 
153
  except Exception:
154
  pass
155
 
 
156
  current_url = st.session_state.get("url", "")
157
  if current_url != st.session_state.get("last_url_value"):
158
  clear_all_video_state()
 
162
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
163
 
164
  settings_exp = st.sidebar.expander("Settings", expanded=False)
165
+ settings_exp.text_input("Gemini Model (short name)", "gemini-2.5-flash-lite", key="model_input")
166
  settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
167
  default_prompt = (
168
  "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
169
  )
170
+ settings_exp.text_area("Enter analysis", value=default_prompt, height=140, key="analysis_prompt")
171
  settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
172
  settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
173
 
 
174
  key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
175
  settings_exp.caption(f"Using API key from: **{key_source}**")
176
 
 
205
  try:
206
  obj = get_file(name)
207
  except Exception:
 
208
  return file_obj
209
  state = getattr(obj, "state", None)
210
  if not state or getattr(state, "name", None) != "PROCESSING":
 
285
  except Exception:
286
  pass
287
 
 
288
  if generate_now and not st.session_state.get("busy"):
289
  if not st.session_state.get("videos"):
290
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
 
295
  else:
296
  try:
297
  st.session_state["busy"] = True
298
+ maybe_configure_genai(key_to_use)
 
 
 
 
299
 
300
  model_id = (st.session_state.get("model_input") or "gemini-2.5-flash-lite").strip()
301
  if st.session_state.get("last_model") != model_id:
 
340
  st.session_state["last_loaded_path"] = current_path
341
  st.session_state["file_hash"] = current_hash
342
 
343
+ prompt_text = (st.session_state.get("analysis_prompt", "").strip() or default_prompt).strip()
344
 
345
  out = ""
346
+ model_used = model_id
347
+ max_tokens = 512 if st.session_state.get("fast_mode") else 1024
 
 
 
 
 
348
  est_tokens = max_tokens
 
349
 
 
350
  agent = maybe_create_agent(model_used)
351
+ debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None}
352
  if agent:
353
  debug_info["agent_attempted"] = True
354
  try:
355
+ with st.spinner("Generating via Agent..."):
356
  if not processed:
357
  raise RuntimeError("Processed file missing for agent generation")
 
358
  agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
359
+ agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None)
360
+ if not agent_text and isinstance(agent_response, dict):
361
+ for k in ("content", "outputText", "text"):
362
+ if k in agent_response and agent_response[k]:
363
+ agent_text = agent_response[k]
364
+ break
 
 
 
 
 
 
 
365
  if agent_text and str(agent_text).strip():
366
  out = str(agent_text).strip()
367
  debug_info["agent_ok"] = True
 
 
 
 
368
  except Exception as ae:
369
+ debug_info["agent_error"] = f"{ae}\n{traceback.format_exc()}"
 
 
 
 
370
 
371
  if not out:
 
372
  try:
373
  if not HAS_GENAI or genai is None:
374
  raise RuntimeError("Responses API not available; install google.generativeai SDK.")
 
376
  fname = file_name_or_id(processed)
377
  if not fname:
378
  raise RuntimeError("Uploaded file missing name/id")
379
+
380
  system_msg = {"role": "system", "content": prompt_text}
381
  user_msg = {"role": "user", "content": "Please summarize the attached video."}
382
+
383
  response = None
384
+
385
+ # Attempt modern responses API
386
  try:
387
  if hasattr(genai, "responses") and hasattr(genai.responses, "generate"):
388
  response = genai.responses.generate(
 
394
  )
395
  except Exception:
396
  response = None
397
+
398
+ # Attempt GenerativeModel / fallback interfaces
399
  if response is None:
400
  try:
401
  if hasattr(genai, "GenerativeModel"):
 
406
  response = model_obj.generate([system_msg, user_msg], files=[{"name": fname}], max_output_tokens=max_tokens)
407
  except Exception:
408
  response = None
409
+
410
+ # Attempt legacy generate/create
411
  if response is None:
412
  try:
413
  if hasattr(genai, "generate"):
 
416
  response = genai.create(model=model_used, input=[{"text": prompt_text, "files": [{"name": fname}]}], max_output_tokens=max_tokens)
417
  except Exception:
418
  response = None
419
+
420
  if response is None:
421
+ raise RuntimeError("No supported generate method found on google.generativeai; check SDK version or model compatibility.")
422
+
 
423
  outputs = []
424
+ if isinstance(response, dict):
 
 
425
  for key in ("output", "candidates", "items", "responses"):
426
  val = response.get(key)
427
  if isinstance(val, list) and val:
 
438
  if isinstance(val, list) and val:
439
  outputs = val
440
  break
441
+
442
  if not isinstance(outputs, list):
443
  outputs = list(outputs) if outputs else []
444
+
 
445
  text_pieces = []
446
  for item in outputs:
447
  if item is None:
448
  continue
449
+ cand = None
450
  if isinstance(item, dict):
451
  for k in ("content", "text", "message", "output_text", "output"):
452
  if k in item and item[k]:
453
+ cand = item[k]
454
  break
455
  else:
456
  for k in ("content", "text", "message", "output", "output_text"):
457
+ cand = getattr(item, k, None)
458
+ if cand:
459
  break
460
+ if isinstance(cand, str):
461
+ if cand.strip():
462
+ text_pieces.append(cand.strip())
 
463
  continue
464
+ if isinstance(cand, (list, tuple)):
465
+ for c in cand:
 
466
  if c is None:
467
  continue
468
  if isinstance(c, str):
 
476
  if t:
477
  text_pieces.append(str(t).strip())
478
  continue
 
479
  direct = None
480
  if isinstance(item, dict):
481
  direct = item.get("text") or item.get("output_text") or item.get("message")
 
483
  direct = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
484
  if direct:
485
  text_pieces.append(str(direct).strip())
486
+
487
  if not text_pieces:
488
  top_text = None
489
  if isinstance(response, dict):
 
492
  top_text = getattr(response, "text", None) or getattr(response, "message", None)
493
  if top_text:
494
  text_pieces.append(str(top_text).strip())
495
+
 
496
  seen = set()
497
  filtered = []
498
  for t in text_pieces:
 
502
  filtered.append(t)
503
  seen.add(t)
504
  out = "\n\n".join(filtered)
505
+
506
  except Exception as e:
507
  tb = traceback.format_exc()
508
  st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"
509
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
510
  out = ""
511
 
 
512
  if out:
513
  out = remove_prompt_echo(prompt_text, out)
514
  p = prompt_text
 
530
 
531
  except Exception as e:
532
  tb = traceback.format_exc()
533
+ st.session_state["last_error"] = f"{e}\n\nTraceback:\n{tb}"
534
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
535
  finally:
536
  st.session_state["busy"] = False