CB commited on
Commit
14e2c33
·
verified ·
1 Parent(s): f1f7841

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +298 -294
streamlit_app.py CHANGED
@@ -6,36 +6,57 @@ import hashlib
6
  import traceback
7
  from glob import glob
8
  from pathlib import Path
9
- import logging
 
 
 
10
  import yt_dlp
11
  import ffmpeg
12
  import streamlit as st
13
  from dotenv import load_dotenv
14
- from difflib import SequenceMatcher
15
 
 
 
 
16
  try:
17
- import google.generativeai as genai # type: ignore
18
- genai_responses = getattr(genai, "responses", None) or getattr(genai, "Responses", None)
19
- upload_file = getattr(genai, "upload_file", None)
20
- get_file = getattr(genai, "get_file", None)
 
 
 
 
 
 
 
 
21
  HAS_GENAI = True
22
  except Exception:
23
  genai = None
24
- genai_responses = None
25
- upload_file = None
26
- get_file = None
27
  HAS_GENAI = False
28
 
29
- load_dotenv()
30
- logging.basicConfig(level=logging.INFO)
31
- logger = logging.getLogger("video_ai")
32
- logger.propagate = False
33
-
34
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
35
  DATA_DIR = Path("./data")
36
  DATA_DIR.mkdir(exist_ok=True)
37
 
38
- # session defaults
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  st.session_state.setdefault("videos", "")
40
  st.session_state.setdefault("loop_video", False)
41
  st.session_state.setdefault("uploaded_file", None)
@@ -47,25 +68,13 @@ st.session_state.setdefault("last_error", "")
47
  st.session_state.setdefault("file_hash", None)
48
  st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
49
  st.session_state.setdefault("last_model", "")
 
50
  st.session_state.setdefault("last_url_value", "")
51
- st.session_state.setdefault("processing_timeout", 900)
52
- st.session_state.setdefault("generation_timeout", 300)
53
- st.session_state.setdefault("preferred_model", "gemini-2.0-flash-lite")
54
- st.session_state.setdefault("compression_threshold_mb", 200)
55
- st.session_state.setdefault("model_select", st.session_state.get("preferred_model"))
56
- st.session_state.setdefault("custom_model", "")
57
- st.session_state.setdefault("url_input", "")
58
- st.session_state.setdefault("video_password", "")
59
- st.session_state.setdefault("analysis_prompt", "")
60
-
61
- MODEL_OPTIONS = [
62
- "gemini-2.5-flash",
63
- "gemini-2.5-flash-lite",
64
- "gemini-2.0-flash",
65
- "gemini-2.0-flash-lite",
66
- "custom",
67
- ]
68
 
 
69
  def sanitize_filename(path_str: str):
70
  name = Path(path_str).name
71
  return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
@@ -88,17 +97,13 @@ def convert_video_to_mp4(video_path: str) -> str:
88
  pass
89
  return target_path
90
 
91
- def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast", bitrate: str = None):
92
  try:
93
- out = ffmpeg.input(input_path)
94
- if bitrate:
95
- stream = out.output(target_path, vcodec="libx264", preset=preset, video_bitrate=bitrate)
96
- else:
97
- stream = out.output(target_path, vcodec="libx264", crf=crf, preset=preset)
98
- stream.run(overwrite_output=True, quiet=True)
99
- return target_path if os.path.exists(target_path) else input_path
100
  except Exception:
101
- logger.exception("Compression failed")
102
  return input_path
103
 
104
  def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
@@ -135,30 +140,109 @@ def configure_genai_if_needed():
135
  if not key:
136
  return False
137
  try:
138
- if genai is not None and hasattr(genai, "configure"):
139
- genai.configure(api_key=key)
140
  except Exception:
141
- logger.exception("Failed to configure genai")
142
  return True
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  def upload_video_sdk(filepath: str):
145
  key = get_effective_api_key()
146
  if not key:
147
  raise RuntimeError("No API key provided")
148
  if not HAS_GENAI or upload_file is None:
149
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
150
- try:
151
- if genai is not None and hasattr(genai, "configure"):
152
- genai.configure(api_key=key)
153
- except Exception:
154
- pass
155
- try:
156
- return upload_file(filepath)
157
- except Exception:
158
- logger.exception("Upload failed")
159
- raise
160
-
161
- def wait_for_processed(file_obj, timeout: int = None, progress_callback=None):
162
  if timeout is None:
163
  timeout = st.session_state.get("processing_timeout", 900)
164
  if not HAS_GENAI or get_file is None:
@@ -177,31 +261,107 @@ def wait_for_processed(file_obj, timeout: int = None, progress_callback=None):
177
  time.sleep(backoff)
178
  backoff = min(backoff * 2, 8.0)
179
  continue
 
180
  state = getattr(obj, "state", None)
181
- state_name = getattr(state, "name", None) if state else None
182
- if progress_callback:
183
- elapsed = int(time.time() - start)
184
- pct = 50 if state_name == "PROCESSING" else 100
185
- try:
186
- progress_callback(min(100, pct), elapsed, state_name)
187
- except Exception:
188
- pass
189
- if not state_name or state_name != "PROCESSING":
190
  return obj
 
191
  if time.time() - start > timeout:
192
  raise TimeoutError(f"File processing timed out after {int(time.time() - start)}s")
193
  time.sleep(backoff)
194
  backoff = min(backoff * 2, 8.0)
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  def _normalize_genai_response(response):
197
- import json
198
  if response is None:
199
  return ""
200
- try:
201
- if not isinstance(response, dict):
 
202
  response = json.loads(str(response))
203
- except Exception:
204
- pass
 
205
  candidate_lists = []
206
  if isinstance(response, dict):
207
  for key in ("output", "candidates", "items", "responses", "choices"):
@@ -213,6 +373,7 @@ def _normalize_genai_response(response):
213
  if isinstance(v, list) and v:
214
  candidate_lists.append(v)
215
  break
 
216
  text_pieces = []
217
  for lst in candidate_lists:
218
  for item in lst:
@@ -242,12 +403,14 @@ def _normalize_genai_response(response):
242
  text_pieces.append(str(t).strip())
243
  except Exception:
244
  pass
 
245
  if not text_pieces and isinstance(response, dict):
246
  for k in ("text", "message", "output_text"):
247
  v = response.get(k)
248
  if v:
249
  text_pieces.append(str(v).strip())
250
  break
 
251
  seen = set()
252
  filtered = []
253
  for t in text_pieces:
@@ -258,169 +421,17 @@ def _normalize_genai_response(response):
258
  seen.add(t)
259
  return "\n\n".join(filtered).strip()
260
 
261
- def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300, progress_callback=None):
262
- key = get_effective_api_key()
263
- if not key:
264
- raise RuntimeError("No API key provided")
265
- if genai is not None and hasattr(genai, "configure"):
266
- try:
267
- genai.configure(api_key=key)
268
- except Exception:
269
- pass
270
- fname = file_name_or_id(processed) or None
271
- system_msg = {"role": "system", "content": prompt_text}
272
- user_msg = {"role": "user", "content": f"Please summarize the attached video: {fname or '[uploaded file]'}."}
273
- call_variants = [
274
- ("responses.generate", {"model": model_used, "messages": [system_msg, user_msg], "files": [{"name": fname}] if fname else None, "max_output_tokens": max_tokens}),
275
- ("responses.generate_alt", {"model": model_used, "input": [{"text": prompt_text, "files": [{"name": fname}]}] if fname else None, "max_output_tokens": max_tokens}),
276
- ("legacy_create", {"model": model_used, "input": prompt_text, "file": fname, "max_output_tokens": max_tokens}),
277
- ]
278
- def is_transient_error(e_text: str):
279
- txt = str(e_text).lower()
280
- return any(k in txt for k in ("internal", "unavailable", "deadlineexceeded", "deadline exceeded", "timeout", "rate limit", "503", "502", "500"))
281
- start = time.time()
282
- last_exc = None
283
- backoff = 1.0
284
- attempts = 0
285
- while True:
286
- for method_name, payload in call_variants:
287
- attempts += 1
288
- try:
289
- if genai_responses is not None and hasattr(genai_responses, "generate"):
290
- payload = {k: v for k, v in payload.items() if v is not None}
291
- resp = genai_responses.generate(**payload)
292
- text = _normalize_genai_response(resp)
293
- if text and ("please provide the video" in text.lower() or "upload the video" in text.lower()):
294
- raise RuntimeError("Model indicates it didn't receive the file")
295
- return text
296
- if hasattr(genai, "Responses") and hasattr(genai.Responses, "create"):
297
- payload = {k: v for k, v in payload.items() if v is not None}
298
- resp = genai.Responses.create(**payload) # type: ignore
299
- text = _normalize_genai_response(resp)
300
- if text and ("please provide the video" in text.lower() or "upload the video" in text.lower()):
301
- raise RuntimeError("Model indicates it didn't receive the file")
302
- return text
303
- if hasattr(genai, "GenerativeModel"):
304
- try:
305
- model_obj = genai.GenerativeModel(model_name=model_used)
306
- if hasattr(model_obj, "start_chat"):
307
- chat = model_obj.start_chat()
308
- send = getattr(chat, "send_message", None)
309
- if send is None:
310
- raise RuntimeError("ChatSession has no send_message")
311
- try:
312
- resp = send(prompt_text, timeout=timeout)
313
- except TypeError:
314
- resp = send(prompt_text)
315
- text = getattr(resp, "text", None) or str(resp)
316
- text = text if text else _normalize_genai_response(resp)
317
- if text and ("please provide the video" in text.lower() or "upload the video" in text.lower()):
318
- raise RuntimeError("Model indicates it didn't receive the file")
319
- return text
320
- except Exception:
321
- logger.exception("GenerativeModel.chat fallback failed")
322
- raise RuntimeError("No supported response generation method available in installed google-generativeai package.")
323
- except Exception as e:
324
- last_exc = e
325
- msg = str(e)
326
- logger.warning("Generation error (model=%s attempt=%s method=%s): %s", model_used, attempts, method_name, msg)
327
- if not is_transient_error(msg):
328
- if "No supported response generation method" in msg or "has no attribute" in msg or "didn't receive the file" in msg:
329
- raise RuntimeError(
330
- "Installed google-generativeai package may not expose a compatible Responses API or the SDK didn't attach the file correctly. "
331
- "Try upgrading the SDK: pip install --upgrade google-generativeai."
332
- ) from e
333
- raise
334
- if time.time() - start > timeout:
335
- raise TimeoutError(f"Responses.generate timed out after {timeout}s: last error: {last_exc}")
336
- time.sleep(backoff)
337
- backoff = min(backoff * 2, 8.0)
338
-
339
- def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
340
- if not prompt or not text:
341
- return text
342
- a = " ".join(prompt.strip().lower().split())
343
- b_full = text.strip()
344
- b = " ".join(b_full[:check_len].lower().split())
345
- ratio = SequenceMatcher(None, a, b).ratio()
346
- if ratio >= ratio_threshold:
347
- cut = min(len(b_full), max(int(len(prompt) * 0.9), len(a)))
348
- new_text = b_full[cut:].lstrip(" \n:-")
349
- if len(new_text) >= 3:
350
- return new_text
351
- placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
352
- low = b_full.strip().lower()
353
- for ph in placeholders:
354
- if low.startswith(ph):
355
- return b_full[len(ph):].lstrip(" \n:-")
356
- return text
357
-
358
- # reset on URL change
359
- current_url = st.session_state.get("url_input", "")
360
- if current_url != st.session_state.get("last_url_value"):
361
- st.session_state["videos"] = ""
362
- st.session_state["last_loaded_path"] = ""
363
- st.session_state["uploaded_file"] = None
364
- st.session_state["processed_file"] = None
365
- st.session_state["analysis_out"] = ""
366
- st.session_state["last_error"] = ""
367
- st.session_state["file_hash"] = None
368
- for f in glob(str(DATA_DIR / "*")):
369
- try:
370
- os.remove(f)
371
- except Exception:
372
- pass
373
- st.session_state["last_url_value"] = current_url
374
-
375
- st.sidebar.header("Video Input")
376
- st.sidebar.text_input("Video URL", key="url_input", placeholder="https://", value=st.session_state.get("url_input", ""))
377
-
378
- settings_exp = st.sidebar.expander("Settings", expanded=False)
379
-
380
- # compute initial index safely
381
- initial_index = 0
382
- try:
383
- cur = st.session_state.get("model_select", st.session_state.get("preferred_model"))
384
- initial_index = MODEL_OPTIONS.index(cur) if cur in MODEL_OPTIONS else MODEL_OPTIONS.index(st.session_state.get("preferred_model"))
385
- except Exception:
386
- initial_index = 0
387
-
388
- # let the widget own st.session_state["model_select"]; do not assign into it later
389
- with settings_exp:
390
- st.selectbox("Gemini model", MODEL_OPTIONS, index=initial_index, key="model_select")
391
- if st.session_state.get("model_select") == "custom":
392
- st.text_input("Custom model name", value=st.session_state.get("custom_model", ""), key="custom_model")
393
- model_input_value = (st.session_state.get("custom_model") or st.session_state.get("model_select")).strip()
394
-
395
- st.text_input("Google API Key", key="api_key_input", value=st.session_state.get("api_key", ""), type="password")
396
-
397
- default_prompt = (
398
- "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
399
- )
400
- st.text_area("Enter analysis prompt", value=st.session_state.get("analysis_prompt", default_prompt), height=140, key="analysis_prompt")
401
-
402
- st.text_input("Video Password (if needed)", key="video_password_input", placeholder="password", type="password")
403
-
404
- st.number_input("Processing timeout (s)", min_value=60, max_value=3600, value=st.session_state.get("processing_timeout", 900), step=30, key="processing_timeout_input")
405
- st.number_input("Generation timeout (s)", min_value=30, max_value=1800, value=st.session_state.get("generation_timeout", 300), step=10, key="generation_timeout_input")
406
- st.number_input("Compression threshold (MB)", min_value=10, max_value=2000, value=st.session_state.get("compression_threshold_mb", 200), step=10, key="compression_threshold_input")
407
-
408
- key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
409
- st.caption(f"Using API key from: {key_source}")
410
-
411
- if not get_effective_api_key():
412
- st.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
413
-
414
  col1, col2 = st.columns([1, 3])
415
  with col1:
416
- generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()), key="gen_button")
417
  with col2:
418
- pass
419
 
420
- if st.sidebar.button("Load Video", use_container_width=True, key="load_video_btn"):
421
  try:
422
- vpw = st.session_state.get("video_password", "")
423
- path = download_video_ytdlp(st.session_state.get("url_input", ""), str(DATA_DIR), vpw)
424
  st.session_state["videos"] = path
425
  st.session_state["last_loaded_path"] = path
426
  st.session_state.pop("uploaded_file", None)
@@ -439,26 +450,15 @@ if st.session_state["videos"]:
439
  st.sidebar.write("Couldn't preview video")
440
 
441
  with st.sidebar.expander("Options", expanded=False):
442
- loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False), key="loop_checkbox")
443
- st.session_state["loop_video"] = st.session_state.get("loop_checkbox", st.session_state.get("loop_video", False))
444
-
445
- if st.button("Clear Video(s)", key="clear_videos_btn"):
446
- st.session_state["videos"] = ""
447
- st.session_state["last_loaded_path"] = ""
448
- st.session_state["uploaded_file"] = None
449
- st.session_state["processed_file"] = None
450
- st.session_state["analysis_out"] = ""
451
- st.session_state["last_error"] = ""
452
- st.session_state["file_hash"] = None
453
- for f in glob(str(DATA_DIR / "*")):
454
- try:
455
- os.remove(f)
456
- except Exception:
457
- pass
458
 
459
  try:
460
  with open(st.session_state["videos"], "rb") as vf:
461
- st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True, key="download_video_btn")
462
  except Exception:
463
  st.sidebar.error("Failed to prepare download")
464
 
@@ -466,14 +466,12 @@ if st.session_state["videos"]:
466
  try:
467
  file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
468
  st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
469
- if file_size_mb > st.session_state.get("compression_threshold_mb", 200):
470
- st.sidebar.warning("Large file detected — it will be compressed automatically before upload.", icon="⚠️")
471
- else:
472
- st.sidebar.info("File ≤ threshold — will be uploaded unchanged.")
473
  except Exception:
474
  pass
475
 
476
- # generation flow
477
  if generate_now and not st.session_state.get("busy"):
478
  if not st.session_state.get("videos"):
479
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -485,14 +483,15 @@ if generate_now and not st.session_state.get("busy"):
485
  try:
486
  st.session_state["busy"] = True
487
  try:
488
- if HAS_GENAI and genai is not None and hasattr(genai, "configure"):
489
  genai.configure(api_key=key_to_use)
490
  except Exception:
491
- logger.exception("genai configure failed")
492
 
493
- model_id = model_input_value or st.session_state.get("preferred_model") or "gemini-2.0-flash-lite"
494
  if st.session_state.get("last_model") != model_id:
495
  st.session_state["last_model"] = ""
 
496
 
497
  processed = st.session_state.get("processed_file")
498
  current_path = st.session_state.get("videos")
@@ -506,25 +505,13 @@ if generate_now and not st.session_state.get("busy"):
506
  reupload_needed = False
507
 
508
  if reupload_needed:
509
- if not HAS_GENAI or upload_file is None:
510
- raise RuntimeError("google.generativeai SDK or upload support unavailable; cannot upload video. Use SDK with upload_file support.")
511
  local_path = current_path
512
- try:
513
- file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
514
- except Exception:
515
- file_size_mb = None
516
-
517
- compressed = False
518
- upload_path = local_path
519
- threshold_mb = st.session_state.get("compression_threshold_mb", 200)
520
- if file_size_mb is not None and file_size_mb > threshold_mb:
521
- compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
522
- with st.spinner("Compressing video before upload..."):
523
- upload_path = compress_video(local_path, compressed_path, crf=28, preset="fast")
524
- if upload_path != local_path:
525
- compressed = True
526
 
527
  with st.spinner(f"Uploading video{' (compressed)' if compressed else ''}..."):
 
528
  try:
529
  uploaded = upload_video_sdk(upload_path)
530
  except Exception as e:
@@ -535,13 +522,7 @@ if generate_now and not st.session_state.get("busy"):
535
  try:
536
  processing_placeholder = st.empty()
537
  processing_bar = processing_placeholder.progress(0)
538
- def processing_cb(pct, elapsed, state):
539
- try:
540
- processing_bar.progress(min(100, int(pct)))
541
- processing_placeholder.caption(f"State: {state} — elapsed: {elapsed}s")
542
- except Exception:
543
- pass
544
- processed = wait_for_processed(uploaded, timeout=st.session_state.get("processing_timeout", 900), progress_callback=processing_cb)
545
  processing_bar.progress(100)
546
  processing_placeholder.success("Processing complete")
547
  except Exception as e:
@@ -554,28 +535,51 @@ if generate_now and not st.session_state.get("busy"):
554
  st.session_state["last_loaded_path"] = current_path
555
  st.session_state["file_hash"] = current_hash
556
 
557
- prompt_text = (st.session_state.get("analysis_prompt", "") or "").strip() or default_prompt
558
  out = ""
559
  model_used = model_id
560
  max_tokens = 2048 if "2.5" in model_used else 1024
561
  est_tokens = max_tokens
562
 
563
- try:
564
- gen_progress_placeholder = st.empty()
565
- gen_status = gen_progress_placeholder.text("Starting generation...")
566
- start_gen = time.time()
567
- def gen_progress_cb(stage, elapsed, info):
568
- try:
569
- gen_status.text(f"Stage: {stage} elapsed: {elapsed}s — {info}")
570
- except Exception:
571
- pass
572
- out = generate_via_responses_api(prompt_text, st.session_state.get("processed_file"), model_used, max_tokens=max_tokens, timeout=st.session_state.get("generation_timeout", 300), progress_callback=gen_progress_cb)
573
- gen_progress_placeholder.text(f"Generation complete in {int(time.time()-start_gen)}s")
574
- except Exception as e:
575
- tb = traceback.format_exc()
576
- st.session_state["last_error"] = f"Responses API error: {e}\n\nTraceback:\n{tb}"
577
- st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
578
- out = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
 
580
  if out:
581
  out = remove_prompt_echo(prompt_text, out)
@@ -598,7 +602,7 @@ if generate_now and not st.session_state.get("busy"):
598
 
599
  except Exception as e:
600
  tb = traceback.format_exc()
601
- st.session_state["last_error"] = f"{e}\n\nTraceback:\n{tb}"
602
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
603
  finally:
604
  st.session_state["busy"] = False
 
6
  import traceback
7
  from glob import glob
8
  from pathlib import Path
9
+ from difflib import SequenceMatcher
10
+ import concurrent.futures
11
+ import json
12
+
13
  import yt_dlp
14
  import ffmpeg
15
  import streamlit as st
16
  from dotenv import load_dotenv
 
17
 
18
+ load_dotenv()
19
+
20
+ # Optional phi integration (Agent + Gemini wrapper)
21
  try:
22
+ from phi.agent import Agent
23
+ from phi.model.google import Gemini
24
+ from phi.tools.duckduckgo import DuckDuckGo
25
+ HAS_PHI = True
26
+ except Exception:
27
+ Agent = Gemini = DuckDuckGo = None
28
+ HAS_PHI = False
29
+
30
+ # google.generativeai SDK
31
+ try:
32
+ import google.generativeai as genai
33
+ from google.generativeai import upload_file, get_file # type: ignore
34
  HAS_GENAI = True
35
  except Exception:
36
  genai = None
37
+ upload_file = get_file = None
 
 
38
  HAS_GENAI = False
39
 
 
 
 
 
 
40
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
41
  DATA_DIR = Path("./data")
42
  DATA_DIR.mkdir(exist_ok=True)
43
 
44
+ # ---- Defaults & constants ----
45
+ MODEL_OPTIONS = [
46
+ "gemini-2.5-flash",
47
+ "gemini-2.5-flash-lite",
48
+ "gemini-2.0-flash",
49
+ "gemini-2.0-flash-lite",
50
+ "custom",
51
+ ]
52
+
53
+ DEFAULT_MODEL = "gemini-2.0-flash-lite"
54
+ DEFAULT_PROMPT = (
55
+ "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
56
+ "Keep language professional. Include a list of observations for notable events."
57
+ )
58
+
59
+ # ---- Session defaults ----
60
  st.session_state.setdefault("videos", "")
61
  st.session_state.setdefault("loop_video", False)
62
  st.session_state.setdefault("uploaded_file", None)
 
68
  st.session_state.setdefault("file_hash", None)
69
  st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
70
  st.session_state.setdefault("last_model", "")
71
+ st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
72
  st.session_state.setdefault("last_url_value", "")
73
+ st.session_state.setdefault("processing_timeout", 900) # seconds
74
+ st.session_state.setdefault("generation_timeout", 300) # seconds
75
+ st.session_state.setdefault("compress_threshold_mb", 200) # optional compression default
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ # ---- Helpers ----
78
  def sanitize_filename(path_str: str):
79
  name = Path(path_str).name
80
  return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 
97
  pass
98
  return target_path
99
 
100
+ def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
101
  try:
102
+ ffmpeg.input(input_path).output(
103
+ target_path, vcodec="libx264", crf=crf, preset=preset
104
+ ).run(overwrite_output=True, quiet=True)
105
+ return target_path
 
 
 
106
  except Exception:
 
107
  return input_path
108
 
109
  def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
 
140
  if not key:
141
  return False
142
  try:
143
+ genai.configure(api_key=key)
 
144
  except Exception:
145
+ pass
146
  return True
147
 
148
+ # ---- Agent management (reuse) ----
149
+ _agent = None
150
+ def maybe_create_agent(model_id: str):
151
+ global _agent
152
+ key = get_effective_api_key()
153
+ if not (HAS_PHI and HAS_GENAI and key):
154
+ _agent = None
155
+ return None
156
+ if _agent and st.session_state.get("last_model") == model_id:
157
+ return _agent
158
+ try:
159
+ genai.configure(api_key=key)
160
+ _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
161
+ st.session_state["last_model"] = model_id
162
+ except Exception:
163
+ _agent = None
164
+ return _agent
165
+
166
+ def clear_all_video_state():
167
+ st.session_state.pop("uploaded_file", None)
168
+ st.session_state.pop("processed_file", None)
169
+ st.session_state["videos"] = ""
170
+ st.session_state["last_loaded_path"] = ""
171
+ st.session_state["analysis_out"] = ""
172
+ st.session_state["last_error"] = ""
173
+ st.session_state["file_hash"] = None
174
+ for f in glob(str(DATA_DIR / "*")):
175
+ try:
176
+ os.remove(f)
177
+ except Exception:
178
+ pass
179
+
180
+ # Reset when URL changes
181
+ current_url = st.session_state.get("url", "")
182
+ if current_url != st.session_state.get("last_url_value"):
183
+ clear_all_video_state()
184
+ st.session_state["last_url_value"] = current_url
185
+
186
+ # ---- Sidebar UI ----
187
+ st.sidebar.header("Video Input")
188
+ st.sidebar.text_input("Video URL", key="url", placeholder="https://")
189
+
190
+ settings_exp = st.sidebar.expander("Settings", expanded=False)
191
+ model_choice = settings_exp.selectbox("Select model", options=MODEL_OPTIONS, index=MODEL_OPTIONS.index(DEFAULT_MODEL) if DEFAULT_MODEL in MODEL_OPTIONS else 0)
192
+ if model_choice == "custom":
193
+ model_input = settings_exp.text_input("Custom model id", value=DEFAULT_MODEL, key="model_input")
194
+ model_selected = model_input.strip() or DEFAULT_MODEL
195
+ else:
196
+ st.session_state["model_input"] = model_choice
197
+ model_selected = model_choice
198
+
199
+ settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
200
+ analysis_prompt = settings_exp.text_area("Analysis prompt", value=DEFAULT_PROMPT, height=140)
201
+ settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
202
+
203
+ settings_exp.number_input(
204
+ "Processing timeout (s)", min_value=60, max_value=3600,
205
+ value=st.session_state.get("processing_timeout", 900), step=30,
206
+ key="processing_timeout",
207
+ )
208
+ settings_exp.number_input(
209
+ "Generation timeout (s)", min_value=30, max_value=1800,
210
+ value=st.session_state.get("generation_timeout", 300), step=10,
211
+ key="generation_timeout",
212
+ )
213
+ settings_exp.number_input(
214
+ "Optional compression threshold (MB)", min_value=10, max_value=2000,
215
+ value=st.session_state.get("compress_threshold_mb", 200), step=10,
216
+ key="compress_threshold_mb",
217
+ )
218
+
219
+ key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
220
+ settings_exp.caption(f"Using API key from: **{key_source}**")
221
+ if not get_effective_api_key():
222
+ settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
223
+
224
+ safety_settings = [
225
+ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
226
+ {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
227
+ {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
228
+ {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
229
+ ]
230
+
231
+ # ---- Upload & processing helpers ----
232
  def upload_video_sdk(filepath: str):
233
  key = get_effective_api_key()
234
  if not key:
235
  raise RuntimeError("No API key provided")
236
  if not HAS_GENAI or upload_file is None:
237
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
238
+ genai.configure(api_key=key)
239
+ return upload_file(filepath)
240
+
241
+ def wait_for_processed(file_obj, timeout: int = None):
242
+ """
243
+ Poll get_file until file is no longer PROCESSING.
244
+ Retries get_file on transient errors with exponential backoff.
245
+ """
 
 
 
 
246
  if timeout is None:
247
  timeout = st.session_state.get("processing_timeout", 900)
248
  if not HAS_GENAI or get_file is None:
 
261
  time.sleep(backoff)
262
  backoff = min(backoff * 2, 8.0)
263
  continue
264
+
265
  state = getattr(obj, "state", None)
266
+ if not state or getattr(state, "name", None) != "PROCESSING":
 
 
 
 
 
 
 
 
267
  return obj
268
+
269
  if time.time() - start > timeout:
270
  raise TimeoutError(f"File processing timed out after {int(time.time() - start)}s")
271
  time.sleep(backoff)
272
  backoff = min(backoff * 2, 8.0)
273
 
274
+ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
275
+ if not prompt or not text:
276
+ return text
277
+ a = " ".join(prompt.strip().lower().split())
278
+ b_full = text.strip()
279
+ b = " ".join(b_full[:check_len].lower().split())
280
+ ratio = SequenceMatcher(None, a, b).ratio()
281
+ if ratio >= ratio_threshold:
282
+ cut = min(len(b_full), max(int(len(prompt) * 0.9), len(a)))
283
+ new_text = b_full[cut:].lstrip(" \n:-")
284
+ if len(new_text) >= 3:
285
+ return new_text
286
+ placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
287
+ low = b_full.strip().lower()
288
+ for ph in placeholders:
289
+ if low.startswith(ph):
290
+ return b_full[len(ph):].lstrip(" \n:-")
291
+ return text
292
+
293
+ def compress_video_if_large(local_path: str, threshold_mb: int = 200):
294
+ try:
295
+ file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
296
+ except Exception as e:
297
+ st.session_state["last_error"] = f"Failed to stat file before compression: {e}"
298
+ return local_path, False
299
+
300
+ if file_size_mb <= threshold_mb:
301
+ return local_path, False
302
+
303
+ compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
304
+ try:
305
+ result = compress_video(local_path, compressed_path, crf=28, preset="fast")
306
+ if result and os.path.exists(result):
307
+ return result, True
308
+ return local_path, False
309
+ except Exception as e:
310
+ st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
311
+ return local_path, False
312
+
313
+ # ---- Robust Responses API caller adapted for varying model versions ----
314
+ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300):
315
+ key = get_effective_api_key()
316
+ if not key:
317
+ raise RuntimeError("No API key provided")
318
+ if not HAS_GENAI or genai is None:
319
+ raise RuntimeError("Responses API not available; install google.generativeai SDK.")
320
+ genai.configure(api_key=key)
321
+ fname = file_name_or_id(processed)
322
+ if not fname:
323
+ raise RuntimeError("Uploaded file missing name/id")
324
+
325
+ system_msg = {"role": "system", "content": prompt_text}
326
+ user_msg = {"role": "user", "content": "Please summarize the attached video."}
327
+
328
+ # Some model versions and SDK releases expect messages, some older ones expect input with files.
329
+ call_variants = [
330
+ {"messages": [system_msg, user_msg], "files": [{"name": fname}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
331
+ {"input": [{"text": prompt_text, "files": [{"name": fname}]}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
332
+ ]
333
+
334
+ last_exc = None
335
+ start = time.time()
336
+ backoff = 1.0
337
+ while True:
338
+ for payload in call_variants:
339
+ try:
340
+ response = genai.responses.generate(model=model_used, **payload)
341
+ return _normalize_genai_response(response)
342
+ except Exception as e:
343
+ last_exc = e
344
+ msg = str(e).lower()
345
+ if any(k in msg for k in ("internal", "unavailable", "deadlineexceeded", "deadline exceeded", "timeout", "rate limit")):
346
+ pass
347
+ else:
348
+ raise
349
+ if time.time() - start > timeout:
350
+ raise TimeoutError(f"Responses.generate timed out after {timeout}s: last error: {last_exc}")
351
+ time.sleep(backoff)
352
+ backoff = min(backoff * 2, 8.0)
353
+
354
  def _normalize_genai_response(response):
355
+ outputs = []
356
  if response is None:
357
  return ""
358
+
359
+ if not isinstance(response, dict):
360
+ try:
361
  response = json.loads(str(response))
362
+ except Exception:
363
+ pass
364
+
365
  candidate_lists = []
366
  if isinstance(response, dict):
367
  for key in ("output", "candidates", "items", "responses", "choices"):
 
373
  if isinstance(v, list) and v:
374
  candidate_lists.append(v)
375
  break
376
+
377
  text_pieces = []
378
  for lst in candidate_lists:
379
  for item in lst:
 
403
  text_pieces.append(str(t).strip())
404
  except Exception:
405
  pass
406
+
407
  if not text_pieces and isinstance(response, dict):
408
  for k in ("text", "message", "output_text"):
409
  v = response.get(k)
410
  if v:
411
  text_pieces.append(str(v).strip())
412
  break
413
+
414
  seen = set()
415
  filtered = []
416
  for t in text_pieces:
 
421
  seen.add(t)
422
  return "\n\n".join(filtered).strip()
423
 
424
+ # ---- Layout ----
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
  col1, col2 = st.columns([1, 3])
426
  with col1:
427
+ generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
428
  with col2:
429
+ st.write("") # placeholder
430
 
431
+ if st.sidebar.button("Load Video", use_container_width=True):
432
  try:
433
+ vpw = st.session_state.get("video-password", "")
434
+ path = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
435
  st.session_state["videos"] = path
436
  st.session_state["last_loaded_path"] = path
437
  st.session_state.pop("uploaded_file", None)
 
450
  st.sidebar.write("Couldn't preview video")
451
 
452
  with st.sidebar.expander("Options", expanded=False):
453
+ loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False))
454
+ st.session_state["loop_video"] = loop_checkbox
455
+
456
+ if st.button("Clear Video(s)"):
457
+ clear_all_video_state()
 
 
 
 
 
 
 
 
 
 
 
458
 
459
  try:
460
  with open(st.session_state["videos"], "rb") as vf:
461
+ st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
462
  except Exception:
463
  st.sidebar.error("Failed to prepare download")
464
 
 
466
  try:
467
  file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
468
  st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
469
+ if file_size_mb > st.session_state.get("compress_threshold_mb", 200):
470
+ st.sidebar.warning(f"Large file detected — it will be compressed automatically before upload (>{st.session_state.get('compress_threshold_mb')} MB).", icon="⚠️")
 
 
471
  except Exception:
472
  pass
473
 
474
+ # ---- Main generation flow ----
475
  if generate_now and not st.session_state.get("busy"):
476
  if not st.session_state.get("videos"):
477
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
 
483
  try:
484
  st.session_state["busy"] = True
485
  try:
486
+ if HAS_GENAI and genai is not None:
487
  genai.configure(api_key=key_to_use)
488
  except Exception:
489
+ pass
490
 
491
+ model_id = (st.session_state.get("model_input") or model_selected or DEFAULT_MODEL).strip()
492
  if st.session_state.get("last_model") != model_id:
493
  st.session_state["last_model"] = ""
494
+ maybe_create_agent(model_id)
495
 
496
  processed = st.session_state.get("processed_file")
497
  current_path = st.session_state.get("videos")
 
505
  reupload_needed = False
506
 
507
  if reupload_needed:
508
+ if not HAS_GENAI:
509
+ raise RuntimeError("google.generativeai SDK not available; install it.")
510
  local_path = current_path
511
+ upload_path, compressed = compress_video_if_large(local_path, threshold_mb=st.session_state.get("compress_threshold_mb", 200))
 
 
 
 
 
 
 
 
 
 
 
 
 
512
 
513
  with st.spinner(f"Uploading video{' (compressed)' if compressed else ''}..."):
514
+ progress_placeholder = st.empty()
515
  try:
516
  uploaded = upload_video_sdk(upload_path)
517
  except Exception as e:
 
522
  try:
523
  processing_placeholder = st.empty()
524
  processing_bar = processing_placeholder.progress(0)
525
+ processed = wait_for_processed(uploaded, timeout=st.session_state.get("processing_timeout", 900))
 
 
 
 
 
 
526
  processing_bar.progress(100)
527
  processing_placeholder.success("Processing complete")
528
  except Exception as e:
 
535
  st.session_state["last_loaded_path"] = current_path
536
  st.session_state["file_hash"] = current_hash
537
 
538
+ prompt_text = (analysis_prompt.strip() or DEFAULT_PROMPT).strip()
539
  out = ""
540
  model_used = model_id
541
  max_tokens = 2048 if "2.5" in model_used else 1024
542
  est_tokens = max_tokens
543
 
544
+ # Try Agent first, fallback to Responses API
545
+ agent = maybe_create_agent(model_used)
546
+ debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None, "agent_response_has_text": False}
547
+ if agent:
548
+ debug_info["agent_attempted"] = True
549
+ try:
550
+ with st.spinner("Generating description via Agent..."):
551
+ if not processed:
552
+ raise RuntimeError("Processed file missing for agent generation")
553
+ agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
554
+ agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None) or None
555
+ if not agent_text:
556
+ try:
557
+ if isinstance(agent_response, dict):
558
+ for k in ("content", "outputText", "text", "message"):
559
+ if k in agent_response and agent_response[k]:
560
+ agent_text = agent_response[k]
561
+ break
562
+ except Exception:
563
+ pass
564
+ if agent_text and str(agent_text).strip():
565
+ out = str(agent_text).strip()
566
+ debug_info["agent_ok"] = True
567
+ debug_info["agent_response_has_text"] = True
568
+ else:
569
+ debug_info["agent_ok"] = False
570
+ except Exception as ae:
571
+ debug_info["agent_error"] = f"{ae}"
572
+ debug_info["agent_traceback"] = traceback.format_exc()
573
+
574
+ if not out:
575
+ try:
576
+ with st.spinner("Generating description via Responses API..."):
577
+ out = generate_via_responses_api(prompt_text, processed, model_used, max_tokens=max_tokens, timeout=st.session_state.get("generation_timeout", 300))
578
+ except Exception as e:
579
+ tb = traceback.format_exc()
580
+ st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"
581
+ st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
582
+ out = ""
583
 
584
  if out:
585
  out = remove_prompt_echo(prompt_text, out)
 
602
 
603
  except Exception as e:
604
  tb = traceback.format_exc()
605
+ st.session_state["last_error"] = f"{e}\n\nDebug: {locals().get('debug_info', {})}\n\nTraceback:\n{tb}"
606
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
607
  finally:
608
  st.session_state["busy"] = False