CB commited on
Commit
44f1bd9
·
verified ·
1 Parent(s): a1e5710

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +25 -69
streamlit_app.py CHANGED
@@ -6,18 +6,13 @@ import hashlib
6
  import traceback
7
  from glob import glob
8
  from pathlib import Path
9
- import json
10
  import logging
11
- import mimetypes
12
-
13
  import yt_dlp
14
  import ffmpeg
15
  import streamlit as st
16
  from dotenv import load_dotenv
17
  from difflib import SequenceMatcher
18
- import requests
19
 
20
- # Try import google.generativeai, support multiple SDK shapes
21
  try:
22
  import google.generativeai as genai # type: ignore
23
  genai_responses = getattr(genai, "responses", None) or getattr(genai, "Responses", None)
@@ -32,7 +27,6 @@ except Exception:
32
  HAS_GENAI = False
33
 
34
  load_dotenv()
35
-
36
  logging.basicConfig(level=logging.INFO)
37
  logger = logging.getLogger("video_ai")
38
  logger.propagate = False
@@ -41,6 +35,7 @@ st.set_page_config(page_title="Generate the story of videos", layout="wide")
41
  DATA_DIR = Path("./data")
42
  DATA_DIR.mkdir(exist_ok=True)
43
 
 
44
  st.session_state.setdefault("videos", "")
45
  st.session_state.setdefault("loop_video", False)
46
  st.session_state.setdefault("uploaded_file", None)
@@ -57,6 +52,8 @@ st.session_state.setdefault("processing_timeout", 900)
57
  st.session_state.setdefault("generation_timeout", 300)
58
  st.session_state.setdefault("preferred_model", "gemini-2.0-flash-lite")
59
  st.session_state.setdefault("compression_threshold_mb", 200)
 
 
60
 
61
  MODEL_OPTIONS = [
62
  "gemini-2.5-flash",
@@ -177,7 +174,6 @@ def wait_for_processed(file_obj, timeout: int = None, progress_callback=None):
177
  time.sleep(backoff)
178
  backoff = min(backoff * 2, 8.0)
179
  continue
180
-
181
  state = getattr(obj, "state", None)
182
  state_name = getattr(state, "name", None) if state else None
183
  if progress_callback:
@@ -187,10 +183,8 @@ def wait_for_processed(file_obj, timeout: int = None, progress_callback=None):
187
  progress_callback(min(100, pct), elapsed, state_name)
188
  except Exception:
189
  pass
190
-
191
  if not state_name or state_name != "PROCESSING":
192
  return obj
193
-
194
  if time.time() - start > timeout:
195
  raise TimeoutError(f"File processing timed out after {int(time.time() - start)}s")
196
  time.sleep(backoff)
@@ -199,11 +193,11 @@ def wait_for_processed(file_obj, timeout: int = None, progress_callback=None):
199
  def _normalize_genai_response(response):
200
  if response is None:
201
  return ""
202
- if not isinstance(response, dict):
203
- try:
204
  response = json.loads(str(response))
205
- except Exception:
206
- pass
207
  candidate_lists = []
208
  if isinstance(response, dict):
209
  for key in ("output", "candidates", "items", "responses", "choices"):
@@ -270,52 +264,38 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
270
  except Exception:
271
  pass
272
  fname = file_name_or_id(processed) or None
273
-
274
  system_msg = {"role": "system", "content": prompt_text}
275
  user_msg = {"role": "user", "content": f"Please summarize the attached video: {fname or '[uploaded file]'}."}
276
-
277
  call_variants = [
278
  ("responses.generate", {"model": model_used, "messages": [system_msg, user_msg], "files": [{"name": fname}] if fname else None, "max_output_tokens": max_tokens}),
279
  ("responses.generate_alt", {"model": model_used, "input": [{"text": prompt_text, "files": [{"name": fname}]}] if fname else None, "max_output_tokens": max_tokens}),
280
  ("legacy_create", {"model": model_used, "input": prompt_text, "file": fname, "max_output_tokens": max_tokens}),
281
  ]
282
-
283
  def is_transient_error(e_text: str):
284
  txt = str(e_text).lower()
285
  return any(k in txt for k in ("internal", "unavailable", "deadlineexceeded", "deadline exceeded", "timeout", "rate limit", "503", "502", "500"))
286
-
287
  start = time.time()
288
  last_exc = None
289
  backoff = 1.0
290
  attempts = 0
291
-
292
  while True:
293
  for method_name, payload in call_variants:
294
  attempts += 1
295
  try:
296
- if progress_callback:
297
- progress_callback("starting", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": method_name})
298
-
299
  if genai_responses is not None and hasattr(genai_responses, "generate"):
300
  payload = {k: v for k, v in payload.items() if v is not None}
301
  resp = genai_responses.generate(**payload)
302
  text = _normalize_genai_response(resp)
303
- if progress_callback:
304
- progress_callback("done", int(time.time() - start), {"method": method_name})
305
  if text and ("please provide the video" in text.lower() or "upload the video" in text.lower()):
306
  raise RuntimeError("Model indicates it didn't receive the file")
307
  return text
308
-
309
  if hasattr(genai, "Responses") and hasattr(genai.Responses, "create"):
310
  payload = {k: v for k, v in payload.items() if v is not None}
311
  resp = genai.Responses.create(**payload) # type: ignore
312
  text = _normalize_genai_response(resp)
313
- if progress_callback:
314
- progress_callback("done", int(time.time() - start), {"method": method_name})
315
  if text and ("please provide the video" in text.lower() or "upload the video" in text.lower()):
316
  raise RuntimeError("Model indicates it didn't receive the file")
317
  return text
318
-
319
  if hasattr(genai, "GenerativeModel"):
320
  try:
321
  model_obj = genai.GenerativeModel(model_name=model_used)
@@ -330,14 +310,11 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
330
  resp = send(prompt_text)
331
  text = getattr(resp, "text", None) or str(resp)
332
  text = text if text else _normalize_genai_response(resp)
333
- if progress_callback:
334
- progress_callback("done", int(time.time() - start), {"method": "GenerativeModel.chat"})
335
  if text and ("please provide the video" in text.lower() or "upload the video" in text.lower()):
336
  raise RuntimeError("Model indicates it didn't receive the file")
337
  return text
338
  except Exception:
339
  logger.exception("GenerativeModel.chat fallback failed")
340
-
341
  raise RuntimeError("No supported response generation method available in installed google-generativeai package.")
342
  except Exception as e:
343
  last_exc = e
@@ -374,8 +351,8 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
374
  return b_full[len(ph):].lstrip(" \n:-")
375
  return text
376
 
377
- # UI reset on URL change
378
- current_url = st.session_state.get("url", "")
379
  if current_url != st.session_state.get("last_url_value"):
380
  st.session_state["videos"] = ""
381
  st.session_state["last_loaded_path"] = ""
@@ -392,46 +369,26 @@ if current_url != st.session_state.get("last_url_value"):
392
  st.session_state["last_url_value"] = current_url
393
 
394
  st.sidebar.header("Video Input")
395
- st.sidebar.text_input("Video URL", key="url_input", placeholder="https://", value=st.session_state.get("url", ""))
396
 
397
  settings_exp = st.sidebar.expander("Settings", expanded=False)
398
- chosen = settings_exp.selectbox("Gemini model", MODEL_OPTIONS, index=MODEL_OPTIONS.index(st.session_state.get("preferred_model", "gemini-2.0-flash-lite")), key="model_select")
399
- custom_model = ""
400
- if settings_exp.session_state.get("model_select") == "custom":
401
- custom_model = settings_exp.text_input("Custom model name", value=st.session_state.get("preferred_model", "gemini-2.0-flash-lite"), key="custom_model")
402
- model_input_value = (custom_model.strip() if custom_model else settings_exp.session_state.get("model_select")).strip()
403
 
404
- settings_exp.text_input("Google API Key", key="api_key_input", value=st.session_state.get("api_key", ""), type="password")
405
- st.session_state["api_key"] = settings_exp.session_state.get("api_key_input", st.session_state.get("api_key", ""))
406
 
407
  default_prompt = (
408
  "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
409
  )
410
- analysis_prompt = settings_exp.text_area("Enter analysis prompt", value=st.session_state.get("analysis_prompt", default_prompt), height=140, key="analysis_prompt")
411
- st.session_state["analysis_prompt"] = settings_exp.session_state.get("analysis_prompt", default_prompt)
412
 
413
- settings_exp.text_input("Video Password (if needed)", key="video_password_input", placeholder="password", type="password")
414
 
415
- settings_exp.number_input(
416
- "Processing timeout (s)", min_value=60, max_value=3600,
417
- value=st.session_state.get("processing_timeout", 900), step=30,
418
- key="processing_timeout_input",
419
- )
420
- st.session_state["processing_timeout"] = settings_exp.session_state.get("processing_timeout_input", st.session_state.get("processing_timeout", 900))
421
-
422
- settings_exp.number_input(
423
- "Generation timeout (s)", min_value=30, max_value=1800,
424
- value=st.session_state.get("generation_timeout", 300), step=10,
425
- key="generation_timeout_input",
426
- )
427
- st.session_state["generation_timeout"] = settings_exp.session_state.get("generation_timeout_input", st.session_state.get("generation_timeout", 300))
428
-
429
- settings_exp.number_input(
430
- "Compression threshold (MB)", min_value=10, max_value=2000,
431
- value=st.session_state.get("compression_threshold_mb", 200), step=10,
432
- key="compression_threshold_input",
433
- )
434
- st.session_state["compression_threshold_mb"] = settings_exp.session_state.get("compression_threshold_input", st.session_state.get("compression_threshold_mb", 200))
435
 
436
  key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
437
  settings_exp.caption(f"Using API key from: {key_source}")
@@ -447,8 +404,8 @@ with col2:
447
 
448
  if st.sidebar.button("Load Video", use_container_width=True, key="load_video_btn"):
449
  try:
450
- vpw = settings_exp.session_state.get("video_password_input", "")
451
- path = download_video_ytdlp(st.session_state.get("url", settings_exp.session_state.get("url_input", "")), str(DATA_DIR), vpw)
452
  st.session_state["videos"] = path
453
  st.session_state["last_loaded_path"] = path
454
  st.session_state.pop("uploaded_file", None)
@@ -468,7 +425,7 @@ if st.session_state["videos"]:
468
 
469
  with st.sidebar.expander("Options", expanded=False):
470
  loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False), key="loop_checkbox")
471
- st.session_state["loop_video"] = settings_exp.session_state.get("loop_checkbox", st.session_state.get("loop_video", False))
472
 
473
  if st.button("Clear Video(s)", key="clear_videos_btn"):
474
  st.session_state["videos"] = ""
@@ -501,7 +458,7 @@ if st.session_state["videos"]:
501
  except Exception:
502
  pass
503
 
504
- # Generation flow
505
  if generate_now and not st.session_state.get("busy"):
506
  if not st.session_state.get("videos"):
507
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -537,7 +494,6 @@ if generate_now and not st.session_state.get("busy"):
537
  if not HAS_GENAI or upload_file is None:
538
  raise RuntimeError("google.generativeai SDK or upload support unavailable; cannot upload video. Use SDK with upload_file support.")
539
  local_path = current_path
540
-
541
  try:
542
  file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
543
  except Exception:
@@ -583,7 +539,7 @@ if generate_now and not st.session_state.get("busy"):
583
  st.session_state["last_loaded_path"] = current_path
584
  st.session_state["file_hash"] = current_hash
585
 
586
- prompt_text = (st.session_state.get("analysis_prompt", "") or default_prompt).strip()
587
  out = ""
588
  model_used = model_id
589
  max_tokens = 2048 if "2.5" in model_used else 1024
 
6
  import traceback
7
  from glob import glob
8
  from pathlib import Path
 
9
  import logging
 
 
10
  import yt_dlp
11
  import ffmpeg
12
  import streamlit as st
13
  from dotenv import load_dotenv
14
  from difflib import SequenceMatcher
 
15
 
 
16
  try:
17
  import google.generativeai as genai # type: ignore
18
  genai_responses = getattr(genai, "responses", None) or getattr(genai, "Responses", None)
 
27
  HAS_GENAI = False
28
 
29
  load_dotenv()
 
30
  logging.basicConfig(level=logging.INFO)
31
  logger = logging.getLogger("video_ai")
32
  logger.propagate = False
 
35
  DATA_DIR = Path("./data")
36
  DATA_DIR.mkdir(exist_ok=True)
37
 
38
+ # session defaults
39
  st.session_state.setdefault("videos", "")
40
  st.session_state.setdefault("loop_video", False)
41
  st.session_state.setdefault("uploaded_file", None)
 
52
  st.session_state.setdefault("generation_timeout", 300)
53
  st.session_state.setdefault("preferred_model", "gemini-2.0-flash-lite")
54
  st.session_state.setdefault("compression_threshold_mb", 200)
55
+ st.session_state.setdefault("model_select", st.session_state.get("preferred_model"))
56
+ st.session_state.setdefault("custom_model", "")
57
 
58
  MODEL_OPTIONS = [
59
  "gemini-2.5-flash",
 
174
  time.sleep(backoff)
175
  backoff = min(backoff * 2, 8.0)
176
  continue
 
177
  state = getattr(obj, "state", None)
178
  state_name = getattr(state, "name", None) if state else None
179
  if progress_callback:
 
183
  progress_callback(min(100, pct), elapsed, state_name)
184
  except Exception:
185
  pass
 
186
  if not state_name or state_name != "PROCESSING":
187
  return obj
 
188
  if time.time() - start > timeout:
189
  raise TimeoutError(f"File processing timed out after {int(time.time() - start)}s")
190
  time.sleep(backoff)
 
193
  def _normalize_genai_response(response):
194
  if response is None:
195
  return ""
196
+ try:
197
+ if not isinstance(response, dict):
198
  response = json.loads(str(response))
199
+ except Exception:
200
+ pass
201
  candidate_lists = []
202
  if isinstance(response, dict):
203
  for key in ("output", "candidates", "items", "responses", "choices"):
 
264
  except Exception:
265
  pass
266
  fname = file_name_or_id(processed) or None
 
267
  system_msg = {"role": "system", "content": prompt_text}
268
  user_msg = {"role": "user", "content": f"Please summarize the attached video: {fname or '[uploaded file]'}."}
 
269
  call_variants = [
270
  ("responses.generate", {"model": model_used, "messages": [system_msg, user_msg], "files": [{"name": fname}] if fname else None, "max_output_tokens": max_tokens}),
271
  ("responses.generate_alt", {"model": model_used, "input": [{"text": prompt_text, "files": [{"name": fname}]}] if fname else None, "max_output_tokens": max_tokens}),
272
  ("legacy_create", {"model": model_used, "input": prompt_text, "file": fname, "max_output_tokens": max_tokens}),
273
  ]
 
274
  def is_transient_error(e_text: str):
275
  txt = str(e_text).lower()
276
  return any(k in txt for k in ("internal", "unavailable", "deadlineexceeded", "deadline exceeded", "timeout", "rate limit", "503", "502", "500"))
 
277
  start = time.time()
278
  last_exc = None
279
  backoff = 1.0
280
  attempts = 0
 
281
  while True:
282
  for method_name, payload in call_variants:
283
  attempts += 1
284
  try:
 
 
 
285
  if genai_responses is not None and hasattr(genai_responses, "generate"):
286
  payload = {k: v for k, v in payload.items() if v is not None}
287
  resp = genai_responses.generate(**payload)
288
  text = _normalize_genai_response(resp)
 
 
289
  if text and ("please provide the video" in text.lower() or "upload the video" in text.lower()):
290
  raise RuntimeError("Model indicates it didn't receive the file")
291
  return text
 
292
  if hasattr(genai, "Responses") and hasattr(genai.Responses, "create"):
293
  payload = {k: v for k, v in payload.items() if v is not None}
294
  resp = genai.Responses.create(**payload) # type: ignore
295
  text = _normalize_genai_response(resp)
 
 
296
  if text and ("please provide the video" in text.lower() or "upload the video" in text.lower()):
297
  raise RuntimeError("Model indicates it didn't receive the file")
298
  return text
 
299
  if hasattr(genai, "GenerativeModel"):
300
  try:
301
  model_obj = genai.GenerativeModel(model_name=model_used)
 
310
  resp = send(prompt_text)
311
  text = getattr(resp, "text", None) or str(resp)
312
  text = text if text else _normalize_genai_response(resp)
 
 
313
  if text and ("please provide the video" in text.lower() or "upload the video" in text.lower()):
314
  raise RuntimeError("Model indicates it didn't receive the file")
315
  return text
316
  except Exception:
317
  logger.exception("GenerativeModel.chat fallback failed")
 
318
  raise RuntimeError("No supported response generation method available in installed google-generativeai package.")
319
  except Exception as e:
320
  last_exc = e
 
351
  return b_full[len(ph):].lstrip(" \n:-")
352
  return text
353
 
354
+ # reset on URL change
355
+ current_url = st.session_state.get("url_input", "")
356
  if current_url != st.session_state.get("last_url_value"):
357
  st.session_state["videos"] = ""
358
  st.session_state["last_loaded_path"] = ""
 
369
  st.session_state["last_url_value"] = current_url
370
 
371
  st.sidebar.header("Video Input")
372
+ st.sidebar.text_input("Video URL", key="url_input", placeholder="https://", value=st.session_state.get("url_input", ""))
373
 
374
  settings_exp = st.sidebar.expander("Settings", expanded=False)
375
+ st.session_state["model_select"] = settings_exp.selectbox("Gemini model", MODEL_OPTIONS, index=MODEL_OPTIONS.index(st.session_state.get("model_select", "gemini-2.0-flash-lite")), key="model_select")
376
+ if st.session_state.get("model_select") == "custom":
377
+ st.session_state["custom_model"] = settings_exp.text_input("Custom model name", value=st.session_state.get("custom_model", ""), key="custom_model")
378
+ model_input_value = (st.session_state.get("custom_model") or st.session_state.get("model_select")).strip()
 
379
 
380
+ st.session_state["api_key"] = settings_exp.text_input("Google API Key", key="api_key_input", value=st.session_state.get("api_key", ""), type="password")
 
381
 
382
  default_prompt = (
383
  "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
384
  )
385
+ st.session_state["analysis_prompt"] = settings_exp.text_area("Enter analysis prompt", value=st.session_state.get("analysis_prompt", default_prompt), height=140, key="analysis_prompt")
 
386
 
387
+ st.session_state["video_password"] = settings_exp.text_input("Video Password (if needed)", key="video_password_input", placeholder="password", type="password")
388
 
389
+ st.session_state["processing_timeout"] = settings_exp.number_input("Processing timeout (s)", min_value=60, max_value=3600, value=st.session_state.get("processing_timeout", 900), step=30, key="processing_timeout_input")
390
+ st.session_state["generation_timeout"] = settings_exp.number_input("Generation timeout (s)", min_value=30, max_value=1800, value=st.session_state.get("generation_timeout", 300), step=10, key="generation_timeout_input")
391
+ st.session_state["compression_threshold_mb"] = settings_exp.number_input("Compression threshold (MB)", min_value=10, max_value=2000, value=st.session_state.get("compression_threshold_mb", 200), step=10, key="compression_threshold_input")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
393
  key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
394
  settings_exp.caption(f"Using API key from: {key_source}")
 
404
 
405
  if st.sidebar.button("Load Video", use_container_width=True, key="load_video_btn"):
406
  try:
407
+ vpw = st.session_state.get("video_password", "")
408
+ path = download_video_ytdlp(st.session_state.get("url_input", ""), str(DATA_DIR), vpw)
409
  st.session_state["videos"] = path
410
  st.session_state["last_loaded_path"] = path
411
  st.session_state.pop("uploaded_file", None)
 
425
 
426
  with st.sidebar.expander("Options", expanded=False):
427
  loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False), key="loop_checkbox")
428
+ st.session_state["loop_video"] = st.session_state.get("loop_checkbox", st.session_state.get("loop_video", False))
429
 
430
  if st.button("Clear Video(s)", key="clear_videos_btn"):
431
  st.session_state["videos"] = ""
 
458
  except Exception:
459
  pass
460
 
461
+ # generation flow
462
  if generate_now and not st.session_state.get("busy"):
463
  if not st.session_state.get("videos"):
464
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
 
494
  if not HAS_GENAI or upload_file is None:
495
  raise RuntimeError("google.generativeai SDK or upload support unavailable; cannot upload video. Use SDK with upload_file support.")
496
  local_path = current_path
 
497
  try:
498
  file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
499
  except Exception:
 
539
  st.session_state["last_loaded_path"] = current_path
540
  st.session_state["file_hash"] = current_hash
541
 
542
+ prompt_text = (st.session_state.get("analysis_prompt", "") or "").strip() or default_prompt
543
  out = ""
544
  model_used = model_id
545
  max_tokens = 2048 if "2.5" in model_used else 1024