CB commited on
Commit
eb05733
·
verified ·
1 Parent(s): 18c6ab8

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +64 -23
streamlit_app.py CHANGED
@@ -50,6 +50,7 @@ st.session_state.setdefault("fast_mode", False)
50
  st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
51
  st.session_state.setdefault("last_model", "")
52
  st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
 
53
 
54
  def sanitize_filename(path_str: str):
55
  name = Path(path_str).name
@@ -93,7 +94,8 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
93
  if video_id:
94
  matches = glob(os.path.join(save_dir, f"{video_id}.*"))
95
  else:
96
- matches = sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[:1]
 
97
  if not matches:
98
  raise FileNotFoundError("Downloaded video not found")
99
  return convert_video_to_mp4(matches[0])
@@ -115,7 +117,6 @@ def configure_genai_if_needed():
115
  try:
116
  genai.configure(api_key=key)
117
  except Exception:
118
- # ignore here; callers will handle failures
119
  pass
120
  return True
121
 
@@ -150,8 +151,7 @@ def clear_all_video_state():
150
  except Exception:
151
  pass
152
 
153
- if "last_url_value" not in st.session_state:
154
- st.session_state["last_url_value"] = st.session_state.get("url", "")
155
  current_url = st.session_state.get("url", "")
156
  if current_url != st.session_state.get("last_url_value"):
157
  clear_all_video_state()
@@ -161,12 +161,10 @@ st.sidebar.header("Video Input")
161
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
162
 
163
  settings_exp = st.sidebar.expander("Settings", expanded=False)
164
- model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite", key="model_input")
165
- # session API key widget (session-first, fallback to .env)
166
  settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
167
  default_prompt = (
168
- "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
169
- "Keep language professional and avoid anatomically explicit or sensitive detail. Include a list of observations and any timestamps for notable events."
170
  )
171
  analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
172
  settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
@@ -193,7 +191,6 @@ def upload_video_sdk(filepath: str):
193
  if not HAS_GENAI or upload_file is None:
194
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
195
  genai.configure(api_key=key)
196
- # upload_file may stream; wrap to update session progress if supported
197
  return upload_file(filepath)
198
 
199
  def wait_for_processed(file_obj, timeout=180):
@@ -247,7 +244,10 @@ if st.sidebar.button("Load Video", use_container_width=True):
247
  st.session_state["last_loaded_path"] = path
248
  st.session_state.pop("uploaded_file", None)
249
  st.session_state.pop("processed_file", None)
250
- st.session_state["file_hash"] = file_sha256(path)
 
 
 
251
  except Exception as e:
252
  st.sidebar.error(f"Failed to load video: {e}")
253
 
@@ -271,16 +271,16 @@ if st.session_state["videos"]:
271
  st.sidebar.error("Failed to prepare download")
272
 
273
  st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
274
- # show file size and compression suggestion
275
  try:
276
  file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
277
  st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
278
  if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
279
  st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
280
- st.session_state["fast_mode"] = True
281
  except Exception:
282
  pass
283
 
 
284
  if generate_now and not st.session_state.get("busy"):
285
  if not st.session_state.get("videos"):
286
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -293,14 +293,14 @@ if generate_now and not st.session_state.get("busy"):
293
  st.session_state["busy"] = True
294
  # ensure genai is configured now
295
  try:
296
- genai.configure(api_key=key_to_use)
 
297
  except Exception:
298
  pass
299
 
300
  # recreate/clear agent if key or model changed
301
- model_id = (st.session_state.get("model_input") or "gemini-2.0-flash-lite").strip()
302
  if st.session_state.get("last_model") != model_id:
303
- # clear cached agent to rebuild with new model/key
304
  st.session_state["last_model"] = ""
305
  maybe_create_agent(model_id)
306
 
@@ -346,7 +346,7 @@ if generate_now and not st.session_state.get("busy"):
346
 
347
  out = ""
348
  if st.session_state.get("fast_mode"):
349
- model_used = model_id if model_id else "gemini-2.0-flash-lite"
350
  max_tokens = 512
351
  else:
352
  model_used = model_id
@@ -360,6 +360,9 @@ if generate_now and not st.session_state.get("busy"):
360
  agent = maybe_create_agent(model_used)
361
  if agent:
362
  with st.spinner("Generating description via Agent..."):
 
 
 
363
  response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
364
  out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
365
  else:
@@ -371,6 +374,7 @@ if generate_now and not st.session_state.get("busy"):
371
  raise RuntimeError("Uploaded file missing name/id")
372
  system_msg = {"role": "system", "content": prompt_text}
373
  user_msg = {"role": "user", "content": "Please summarize the attached video."}
 
374
  try:
375
  response = genai.responses.generate(
376
  model=model_used,
@@ -387,23 +391,59 @@ if generate_now and not st.session_state.get("busy"):
387
  max_output_tokens=max_tokens,
388
  )
389
 
390
- outputs = getattr(response, "output", None) or (response.get("output") if isinstance(response, dict) else None) or []
391
- if not outputs and isinstance(response, dict):
392
- outputs = response.get("output", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
 
394
  text_pieces = []
395
- for item in outputs or []:
396
- contents = getattr(item, "content", None) or (item.get("content") if isinstance(item, dict) else None) or []
397
- for c in contents:
 
 
 
 
 
 
 
 
 
 
 
 
 
398
  ctype = getattr(c, "type", None) or (c.get("type") if isinstance(c, dict) else None)
399
  if ctype in ("output_text", "text") or ctype is None:
400
  txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
401
  if txt:
402
  text_pieces.append(txt)
 
 
403
  if not text_pieces:
404
  top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
405
  if top_text:
406
  text_pieces.append(top_text)
 
 
407
  seen = set()
408
  filtered = []
409
  for t in text_pieces:
@@ -412,6 +452,7 @@ if generate_now and not st.session_state.get("busy"):
412
  seen.add(t)
413
  out = "\n\n".join(filtered)
414
 
 
415
  if out:
416
  out = remove_prompt_echo(prompt_text, out)
417
  p = prompt_text
@@ -428,7 +469,7 @@ if generate_now and not st.session_state.get("busy"):
428
  st.session_state["analysis_out"] = out
429
  st.session_state["last_error"] = ""
430
  st.subheader("Analysis Result")
431
- st.markdown(out)
432
  st.caption(est_cost_caption)
433
  except Exception as e:
434
  st.session_state["last_error"] = str(e)
 
50
  st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
51
  st.session_state.setdefault("last_model", "")
52
  st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
53
+ st.session_state.setdefault("last_url_value", "")
54
 
55
  def sanitize_filename(path_str: str):
56
  name = Path(path_str).name
 
94
  if video_id:
95
  matches = glob(os.path.join(save_dir, f"{video_id}.*"))
96
  else:
97
+ all_files = glob(os.path.join(save_dir, "*"))
98
+ matches = sorted(all_files, key=os.path.getmtime, reverse=True)[:1] if all_files else []
99
  if not matches:
100
  raise FileNotFoundError("Downloaded video not found")
101
  return convert_video_to_mp4(matches[0])
 
117
  try:
118
  genai.configure(api_key=key)
119
  except Exception:
 
120
  pass
121
  return True
122
 
 
151
  except Exception:
152
  pass
153
 
154
+ # track url changes
 
155
  current_url = st.session_state.get("url", "")
156
  if current_url != st.session_state.get("last_url_value"):
157
  clear_all_video_state()
 
161
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
162
 
163
  settings_exp = st.sidebar.expander("Settings", expanded=False)
164
+ model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.5-flash-lite", key="model_input")
 
165
  settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
166
  default_prompt = (
167
+ "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
 
168
  )
169
  analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
170
  settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
 
191
  if not HAS_GENAI or upload_file is None:
192
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
193
  genai.configure(api_key=key)
 
194
  return upload_file(filepath)
195
 
196
  def wait_for_processed(file_obj, timeout=180):
 
244
  st.session_state["last_loaded_path"] = path
245
  st.session_state.pop("uploaded_file", None)
246
  st.session_state.pop("processed_file", None)
247
+ try:
248
+ st.session_state["file_hash"] = file_sha256(path)
249
+ except Exception:
250
+ st.session_state["file_hash"] = None
251
  except Exception as e:
252
  st.sidebar.error(f"Failed to load video: {e}")
253
 
 
271
  st.sidebar.error("Failed to prepare download")
272
 
273
  st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
 
274
  try:
275
  file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
276
  st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
277
  if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
278
  st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
279
+ # do not forcibly change user's fast_mode here; just recommend
280
  except Exception:
281
  pass
282
 
283
+ # --- Generation flow ---
284
  if generate_now and not st.session_state.get("busy"):
285
  if not st.session_state.get("videos"):
286
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
 
293
  st.session_state["busy"] = True
294
  # ensure genai is configured now
295
  try:
296
+ if HAS_GENAI and genai is not None:
297
+ genai.configure(api_key=key_to_use)
298
  except Exception:
299
  pass
300
 
301
  # recreate/clear agent if key or model changed
302
+ model_id = (st.session_state.get("model_input") or "gemini-2.5-flash-lite").strip()
303
  if st.session_state.get("last_model") != model_id:
 
304
  st.session_state["last_model"] = ""
305
  maybe_create_agent(model_id)
306
 
 
346
 
347
  out = ""
348
  if st.session_state.get("fast_mode"):
349
+ model_used = model_id if model_id else "gemini-2.5-flash-lite"
350
  max_tokens = 512
351
  else:
352
  model_used = model_id
 
360
  agent = maybe_create_agent(model_used)
361
  if agent:
362
  with st.spinner("Generating description via Agent..."):
363
+ # guard processed for agent: must be non-none
364
+ if not processed:
365
+ raise RuntimeError("Processed file missing for agent generation")
366
  response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
367
  out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
368
  else:
 
374
  raise RuntimeError("Uploaded file missing name/id")
375
  system_msg = {"role": "system", "content": prompt_text}
376
  user_msg = {"role": "user", "content": "Please summarize the attached video."}
377
+ # call responses.generate, handling both signature variants and return shapes
378
  try:
379
  response = genai.responses.generate(
380
  model=model_used,
 
391
  max_output_tokens=max_tokens,
392
  )
393
 
394
+ # Normalize outputs to a list of items with .content or dict form
395
+ outputs = []
396
+ if response is None:
397
+ outputs = []
398
+ else:
399
+ outputs = getattr(response, "output", None) or (response.get("output") if isinstance(response, dict) else None) or []
400
+ # Newer API may use 'candidates' or 'items' -- check safely
401
+ if not outputs:
402
+ # check common alternative keys
403
+ if isinstance(response, dict):
404
+ if "candidates" in response and isinstance(response["candidates"], list):
405
+ outputs = response["candidates"]
406
+ elif "items" in response and isinstance(response["items"], list):
407
+ outputs = response["items"]
408
+ elif "output" in response and isinstance(response["output"], list):
409
+ outputs = response["output"]
410
+ else:
411
+ # attempt attribute access fallbacks
412
+ outputs = getattr(response, "candidates", None) or getattr(response, "items", None) or getattr(response, "output", None) or []
413
+ # ensure outputs is a list
414
+ if outputs is None:
415
+ outputs = []
416
 
417
  text_pieces = []
418
+ for item in outputs:
419
+ # item may be dict or object
420
+ contents = getattr(item, "content", None) or (item.get("content") if isinstance(item, dict) else None)
421
+ # some shapes put text directly on item
422
+ if contents is None:
423
+ # fallback to checking item text or string
424
+ txt_direct = getattr(item, "text", None) or (item.get("text") if isinstance(item, dict) else None)
425
+ if txt_direct:
426
+ text_pieces.append(txt_direct)
427
+ continue
428
+ # contents might be list or single dict/object
429
+ if isinstance(contents, (list, tuple)):
430
+ content_iter = contents
431
+ else:
432
+ content_iter = [contents]
433
+ for c in content_iter:
434
  ctype = getattr(c, "type", None) or (c.get("type") if isinstance(c, dict) else None)
435
  if ctype in ("output_text", "text") or ctype is None:
436
  txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
437
  if txt:
438
  text_pieces.append(txt)
439
+
440
+ # final fallback: top-level text fields
441
  if not text_pieces:
442
  top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
443
  if top_text:
444
  text_pieces.append(top_text)
445
+
446
+ # dedupe while preserving order
447
  seen = set()
448
  filtered = []
449
  for t in text_pieces:
 
452
  seen.add(t)
453
  out = "\n\n".join(filtered)
454
 
455
+ # post-process output to remove prompt echo or placeholders
456
  if out:
457
  out = remove_prompt_echo(prompt_text, out)
458
  p = prompt_text
 
469
  st.session_state["analysis_out"] = out
470
  st.session_state["last_error"] = ""
471
  st.subheader("Analysis Result")
472
+ st.markdown(out if out else "No analysis returned.")
473
  st.caption(est_cost_caption)
474
  except Exception as e:
475
  st.session_state["last_error"] = str(e)