CB commited on
Commit
d9fde4e
·
verified ·
1 Parent(s): 8fea353

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +204 -335
streamlit_app.py CHANGED
@@ -7,60 +7,37 @@ import traceback
7
  from glob import glob
8
  from pathlib import Path
9
  from difflib import SequenceMatcher
10
- import json
11
- import logging
12
 
13
  import yt_dlp
14
- import ffmpeg # ffmpeg-python
15
  import streamlit as st
16
  from dotenv import load_dotenv
17
 
18
  load_dotenv()
19
 
20
- # Optional PHI integration
21
  try:
22
  from phi.agent import Agent
23
  from phi.model.google import Gemini
24
  from phi.tools.duckduckgo import DuckDuckGo
25
-
26
  HAS_PHI = True
27
  except Exception:
28
  Agent = Gemini = DuckDuckGo = None
29
  HAS_PHI = False
30
 
31
- # google.generativeai SDK
32
  try:
33
  import google.generativeai as genai
34
- from google.generativeai import upload_file, get_file
35
-
36
  HAS_GENAI = True
37
  except Exception:
38
  genai = None
39
  upload_file = get_file = None
40
  HAS_GENAI = False
41
 
42
- logging.basicConfig(level=logging.INFO)
43
-
44
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
45
  DATA_DIR = Path("./data")
46
  DATA_DIR.mkdir(exist_ok=True)
47
 
48
- # ---- Defaults & constants ----
49
- MODEL_OPTIONS = [
50
- "gemini-2.5-flash",
51
- "gemini-2.5-flash-lite",
52
- "gemini-2.0-flash",
53
- "gemini-2.0-flash-lite",
54
- "custom",
55
- ]
56
- DEFAULT_MODEL = "gemini-2.0-flash-lite"
57
- DEFAULT_PROMPT = (
58
- "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
59
- "Keep language professional. Include a list of observations for notable events."
60
- )
61
-
62
- # ---- Session defaults ----
63
- st.session_state.setdefault("url", "")
64
  st.session_state.setdefault("videos", "")
65
  st.session_state.setdefault("loop_video", False)
66
  st.session_state.setdefault("uploaded_file", None)
@@ -70,59 +47,39 @@ st.session_state.setdefault("last_loaded_path", "")
70
  st.session_state.setdefault("analysis_out", "")
71
  st.session_state.setdefault("last_error", "")
72
  st.session_state.setdefault("file_hash", None)
 
73
  st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
74
  st.session_state.setdefault("last_model", "")
75
  st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
76
  st.session_state.setdefault("last_url_value", "")
77
- st.session_state.setdefault("processing_timeout", 900)
78
- st.session_state.setdefault("generation_timeout", 300)
79
- st.session_state.setdefault("compress_threshold_mb", 200)
80
 
81
- # ---- Helpers ----
82
  def sanitize_filename(path_str: str):
83
  name = Path(path_str).name
84
  return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
85
 
86
  def file_sha256(path: str, block_size: int = 65536) -> str:
87
- try:
88
- h = hashlib.sha256()
89
- with open(path, "rb") as f:
90
- for chunk in iter(lambda: f.read(block_size), b""):
91
- h.update(chunk)
92
- return h.hexdigest()
93
- except Exception:
94
- return None
95
 
96
  def convert_video_to_mp4(video_path: str) -> str:
97
  target_path = str(Path(video_path).with_suffix(".mp4"))
98
  if os.path.exists(target_path):
99
  return target_path
 
100
  try:
101
- ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
102
- except Exception as e:
103
- logging.exception("ffmpeg conversion failed")
104
- # If conversion fails, do not delete original; re-raise for caller to handle if needed
105
- raise
106
- # Only remove source if target exists and is non-empty
107
- if os.path.exists(target_path) and os.path.getsize(target_path) > 0:
108
- try:
109
- if str(Path(video_path).resolve()) != str(Path(target_path).resolve()):
110
- os.remove(video_path)
111
- except Exception:
112
- logging.exception("Failed to remove original video after conversion")
113
  return target_path
114
 
115
  def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
116
  try:
117
- ffmpeg.input(input_path).output(
118
- target_path, vcodec="libx264", crf=crf, preset=preset
119
- ).run(overwrite_output=True, quiet=True)
120
- if os.path.exists(target_path) and os.path.getsize(target_path) > 0:
121
- return target_path
122
- logging.warning("Compression completed but target missing or empty; returning input path")
123
- return input_path
124
  except Exception:
125
- logging.exception("Video compression failed")
126
  return input_path
127
 
128
  def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
@@ -131,46 +88,24 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
131
  outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
132
  ydl_opts = {"outtmpl": outtmpl, "format": "best"}
133
  if video_password:
134
- # yt-dlp accepts 'videopassword' in options for password-protected videos
135
  ydl_opts["videopassword"] = video_password
136
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
137
  info = ydl.extract_info(url, download=True)
138
- # info may be a dict for single video or playlist; prefer single entry if present
139
- video_candidates = []
140
- if isinstance(info, dict):
141
- # playlist -> entries list
142
- entries = info.get("entries")
143
- if entries:
144
- # get last-downloaded entry (entries may be nested); map to filesystem files by ids
145
- for e in entries:
146
- if isinstance(e, dict) and e.get("id"):
147
- video_candidates.append(str(Path(save_dir) / f"{e['id']}.mp4"))
148
- else:
149
- vid = info.get("id")
150
- ext = info.get("ext") or "mp4"
151
- if vid:
152
- video_candidates.append(str(Path(save_dir) / f"{vid}.{ext}"))
153
- # fallback: pick most recent file in dir
154
- if not video_candidates:
155
- all_files = glob(os.path.join(save_dir, "*"))
156
- if not all_files:
157
- raise FileNotFoundError("Downloaded video not found")
158
- matches = sorted(all_files, key=os.path.getmtime, reverse=True)
159
- chosen = matches[0]
160
  else:
161
- # prefer existing files among candidates; pick first that exists, else fall back to newest
162
- existing = [p for p in video_candidates if os.path.exists(p)]
163
- chosen = existing[0] if existing else (sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[0])
164
- # Ensure mp4 target
165
- final = convert_video_to_mp4(chosen)
166
- return final
167
 
168
  def file_name_or_id(file_obj):
169
  if file_obj is None:
170
  return None
171
  if isinstance(file_obj, dict):
172
  return file_obj.get("name") or file_obj.get("id")
173
- # common SDK wrappers may expose 'name', 'id', 'fileId'
174
  return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
175
 
176
  def get_effective_api_key():
@@ -183,10 +118,9 @@ def configure_genai_if_needed():
183
  try:
184
  genai.configure(api_key=key)
185
  except Exception:
186
- logging.exception("genai.configure failed")
187
  return True
188
 
189
- # ---- Agent management (reuse) ----
190
  _agent = None
191
  def maybe_create_agent(model_id: str):
192
  global _agent
@@ -201,7 +135,6 @@ def maybe_create_agent(model_id: str):
201
  _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
202
  st.session_state["last_model"] = model_id
203
  except Exception:
204
- logging.exception("Failed to create PHI Agent")
205
  _agent = None
206
  return _agent
207
 
@@ -217,50 +150,31 @@ def clear_all_video_state():
217
  try:
218
  os.remove(f)
219
  except Exception:
220
- logging.exception("Failed to remove data file during clear_all_video_state")
221
 
222
- # Reset when URL changes
223
  current_url = st.session_state.get("url", "")
224
  if current_url != st.session_state.get("last_url_value"):
225
  clear_all_video_state()
226
  st.session_state["last_url_value"] = current_url
227
 
228
- # ---- Sidebar UI ----
229
  st.sidebar.header("Video Input")
230
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
231
 
232
  settings_exp = st.sidebar.expander("Settings", expanded=False)
233
- model_choice = settings_exp.selectbox("Select model", options=MODEL_OPTIONS, index=MODEL_OPTIONS.index(DEFAULT_MODEL) if DEFAULT_MODEL in MODEL_OPTIONS else 0)
234
- if model_choice == "custom":
235
- model_input = settings_exp.text_input("Custom model id", value=DEFAULT_MODEL, key="model_input")
236
- model_selected = model_input.strip() or DEFAULT_MODEL
237
- else:
238
- # keep model_input in session_state for later reads
239
- st.session_state["model_input"] = model_choice
240
- model_selected = model_choice
241
-
242
  settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
243
- analysis_prompt = settings_exp.text_area("Analysis prompt", value=DEFAULT_PROMPT, height=140)
244
- settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
245
-
246
- settings_exp.number_input(
247
- "Processing timeout (s)", min_value=60, max_value=3600,
248
- value=st.session_state.get("processing_timeout", 900), step=30,
249
- key="processing_timeout",
250
- )
251
- settings_exp.number_input(
252
- "Generation timeout (s)", min_value=30, max_value=1800,
253
- value=st.session_state.get("generation_timeout", 300), step=10,
254
- key="generation_timeout",
255
- )
256
- settings_exp.number_input(
257
- "Optional compression threshold (MB)", min_value=10, max_value=2000,
258
- value=st.session_state.get("compress_threshold_mb", 200), step=10,
259
- key="compress_threshold_mb",
260
  )
 
 
 
261
 
 
262
  key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
263
  settings_exp.caption(f"Using API key from: **{key_source}**")
 
264
  if not get_effective_api_key():
265
  settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
266
 
@@ -271,7 +185,6 @@ safety_settings = [
271
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
272
  ]
273
 
274
- # ---- Upload & processing helpers ----
275
  def upload_video_sdk(filepath: str):
276
  key = get_effective_api_key()
277
  if not key:
@@ -279,12 +192,9 @@ def upload_video_sdk(filepath: str):
279
  if not HAS_GENAI or upload_file is None:
280
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
281
  genai.configure(api_key=key)
282
- # upload_file may return object with id or name, keep as-is
283
  return upload_file(filepath)
284
 
285
- def wait_for_processed(file_obj, timeout: int = None):
286
- if timeout is None:
287
- timeout = st.session_state.get("processing_timeout", 900)
288
  if not HAS_GENAI or get_file is None:
289
  return file_obj
290
  start = time.time()
@@ -293,21 +203,12 @@ def wait_for_processed(file_obj, timeout: int = None):
293
  return file_obj
294
  backoff = 1.0
295
  while True:
296
- try:
297
- obj = get_file(name)
298
- except Exception as e:
299
- if time.time() - start > timeout:
300
- raise TimeoutError(f"Failed to fetch file status before timeout: {e}")
301
- time.sleep(backoff)
302
- backoff = min(backoff * 2, 8.0)
303
- continue
304
-
305
  state = getattr(obj, "state", None)
306
  if not state or getattr(state, "name", None) != "PROCESSING":
307
  return obj
308
-
309
  if time.time() - start > timeout:
310
- raise TimeoutError(f"File processing timed out after {int(time.time() - start)}s")
311
  time.sleep(backoff)
312
  backoff = min(backoff * 2, 8.0)
313
 
@@ -330,161 +231,11 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
330
  return b_full[len(ph):].lstrip(" \n:-")
331
  return text
332
 
333
- def compress_video_if_large(local_path: str, threshold_mb: int = 200):
334
- try:
335
- file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
336
- except Exception as e:
337
- st.session_state["last_error"] = f"Failed to stat file before compression: {e}"
338
- return local_path, False
339
-
340
- if file_size_mb <= threshold_mb:
341
- return local_path, False
342
-
343
- # build compressed path reliably
344
- p = Path(local_path)
345
- compressed_name = f"{p.stem}_compressed.mp4"
346
- compressed_path = str(p.with_name(compressed_name))
347
-
348
- try:
349
- result = compress_video(local_path, compressed_path, crf=28, preset="fast")
350
- if result and os.path.exists(result) and os.path.getsize(result) > 0:
351
- return result, True
352
- return local_path, False
353
- except Exception as e:
354
- st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
355
- return local_path, False
356
-
357
- # ---- Responses API integration ----
358
- def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300):
359
- key = get_effective_api_key()
360
- if not key:
361
- raise RuntimeError("No API key provided")
362
- if not HAS_GENAI or genai is None:
363
- raise RuntimeError("Responses API not available; install google.generativeai SDK.")
364
- genai.configure(api_key=key)
365
- fname = file_name_or_id(processed)
366
- if not fname:
367
- raise RuntimeError("Uploaded file missing name/id")
368
-
369
- system_msg = {"role": "system", "content": prompt_text}
370
- user_msg = {"role": "user", "content": "Please summarize the attached video."}
371
-
372
- call_variants = [
373
- {"messages": [system_msg, user_msg], "files": [{"name": fname}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
374
- {"input": [{"text": prompt_text, "files": [{"name": fname}]}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
375
- ]
376
-
377
- last_exc = None
378
- start = time.time()
379
- backoff = 1.0
380
- while True:
381
- for payload in call_variants:
382
- try:
383
- response = genai.responses.generate(model=model_used, **payload)
384
- return _normalize_genai_response(response)
385
- except Exception as e:
386
- last_exc = e
387
- msg = str(e).lower()
388
- # retry for transient/server errors
389
- if any(k in msg for k in ("internal", "unavailable", "deadlineexceeded", "deadline exceeded", "timeout", "rate limit")):
390
- logging.warning("Transient error from Responses API, will retry: %s", e)
391
- continue
392
- logging.exception("Non-retryable Responses API error")
393
- raise
394
- if time.time() - start > timeout:
395
- raise TimeoutError(f"Responses.generate timed out after {timeout}s: last error: {last_exc}")
396
- time.sleep(backoff)
397
- backoff = min(backoff * 2, 8.0)
398
-
399
- def _normalize_genai_response(response):
400
- outputs = []
401
- if response is None:
402
- return ""
403
-
404
- if not isinstance(response, dict):
405
- try:
406
- response = json.loads(str(response))
407
- except Exception:
408
- pass
409
-
410
- candidate_lists = []
411
- if isinstance(response, dict):
412
- for key in ("output", "candidates", "items", "responses", "choices"):
413
- val = response.get(key)
414
- if isinstance(val, list) and val:
415
- candidate_lists.append(val)
416
- if not candidate_lists and isinstance(response, dict):
417
- for v in response.values():
418
- if isinstance(v, list) and v:
419
- candidate_lists.append(v)
420
- break
421
-
422
- text_pieces = []
423
- for lst in candidate_lists:
424
- for item in lst:
425
- if not item:
426
- continue
427
- if isinstance(item, dict):
428
- for k in ("content", "text", "message", "output_text", "output"):
429
- t = item.get(k)
430
- if t:
431
- text_pieces.append(str(t).strip())
432
- break
433
- else:
434
- if "content" in item and isinstance(item["content"], list):
435
- for part in item["content"]:
436
- if isinstance(part, dict):
437
- t = part.get("text") or part.get("content")
438
- if t:
439
- text_pieces.append(str(t).strip())
440
- elif isinstance(part, str):
441
- text_pieces.append(part.strip())
442
- elif isinstance(item, str):
443
- text_pieces.append(item.strip())
444
- else:
445
- try:
446
- t = getattr(item, "text", None) or getattr(item, "content", None)
447
- if t:
448
- text_pieces.append(str(t).strip())
449
- except Exception:
450
- pass
451
-
452
- if not text_pieces and isinstance(response, dict):
453
- for k in ("text", "message", "output_text"):
454
- v = response.get(k)
455
- if v:
456
- text_pieces.append(str(v).strip())
457
- break
458
-
459
- seen = set()
460
- filtered = []
461
- for t in text_pieces:
462
- if not isinstance(t, str):
463
- continue
464
- if t and t not in seen:
465
- filtered.append(t)
466
- seen.add(t)
467
- return "\n\n".join(filtered).strip()
468
-
469
- # ---- small helpers for safer tracebacks ----
470
- def safe_traceback(max_chars=2000):
471
- tb = traceback.format_exc()
472
- return tb if len(tb) <= max_chars else tb[:max_chars] + "\n...[truncated]"
473
-
474
- def scrub_api_keys(s: str) -> str:
475
- if not s:
476
- return s
477
- key = get_effective_api_key()
478
- if key and key in s:
479
- return s.replace(key, "[REDACTED_API_KEY]")
480
- return s
481
-
482
- # ---- Layout ----
483
  col1, col2 = st.columns([1, 3])
484
  with col1:
485
  generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
486
  with col2:
487
- st.write("")
488
 
489
  if st.sidebar.button("Load Video", use_container_width=True):
490
  try:
@@ -499,7 +250,6 @@ if st.sidebar.button("Load Video", use_container_width=True):
499
  except Exception:
500
  st.session_state["file_hash"] = None
501
  except Exception as e:
502
- logging.exception("Failed to load video")
503
  st.sidebar.error(f"Failed to load video: {e}")
504
 
505
  if st.session_state["videos"]:
@@ -525,12 +275,12 @@ if st.session_state["videos"]:
525
  try:
526
  file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
527
  st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
528
- if file_size_mb > st.session_state.get("compress_threshold_mb", 200):
529
- st.sidebar.warning(f"Large file detected — it will be compressed automatically before upload (>{st.session_state.get('compress_threshold_mb')} MB).", icon="⚠️")
530
  except Exception:
531
  pass
532
 
533
- # ---- Main generation flow ----
534
  if generate_now and not st.session_state.get("busy"):
535
  if not st.session_state.get("videos"):
536
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -545,9 +295,9 @@ if generate_now and not st.session_state.get("busy"):
545
  if HAS_GENAI and genai is not None:
546
  genai.configure(api_key=key_to_use)
547
  except Exception:
548
- logging.exception("genai.configure failed at start")
549
 
550
- model_id = (st.session_state.get("model_input") or model_selected or DEFAULT_MODEL).strip()
551
  if st.session_state.get("last_model") != model_id:
552
  st.session_state["last_model"] = ""
553
  maybe_create_agent(model_id)
@@ -559,57 +309,51 @@ if generate_now and not st.session_state.get("busy"):
559
  except Exception:
560
  current_hash = None
561
 
562
- # determine if reupload is needed: same local path + same hash + we have uploaded/processed file id
563
  reupload_needed = True
564
- uploaded_file = st.session_state.get("uploaded_file")
565
- uploaded_name = file_name_or_id(uploaded_file)
566
- if processed and st.session_state.get("last_loaded_path") == current_path and st.session_state.get("file_hash") == current_hash and uploaded_name:
567
  reupload_needed = False
568
 
569
  if reupload_needed:
570
  if not HAS_GENAI:
571
  raise RuntimeError("google.generativeai SDK not available; install it.")
572
  local_path = current_path
573
- upload_path, compressed = compress_video_if_large(local_path, threshold_mb=st.session_state.get("compress_threshold_mb", 200))
 
 
 
 
 
574
 
575
- with st.spinner(f"Uploading video{' (compressed)' if compressed else ''}..."):
 
576
  try:
577
- uploaded = upload_video_sdk(upload_path)
578
- except Exception as e:
579
- err = scrub_api_keys(f"Upload failed: {e}\n\nTraceback:\n{safe_traceback()}")
580
- st.session_state["last_error"] = err
581
- st.error("Upload failed. See Last Error for details.")
582
- raise
583
 
584
- try:
585
- processing_placeholder = st.empty()
586
- processing_bar = processing_placeholder.progress(0)
587
- start_time = time.time()
588
- processed = wait_for_processed(uploaded, timeout=st.session_state.get("processing_timeout", 900))
589
- # update progress once after wait (full incremental requires moving polling here)
590
- elapsed = time.time() - start_time
591
- timeout = st.session_state.get("processing_timeout", 900)
592
- pct = min(100, int((elapsed / timeout) * 100)) if timeout > 0 else 0
593
- processing_bar.progress(pct)
594
- processing_placeholder.success("Processing complete")
595
- except Exception as e:
596
- err = scrub_api_keys(f"Processing failed/wait timeout: {e}\n\nTraceback:\n{safe_traceback()}")
597
- st.session_state["last_error"] = err
598
- st.error("Video processing failed or timed out. See Last Error.")
599
- raise
600
 
601
- st.session_state["uploaded_file"] = uploaded
602
- st.session_state["processed_file"] = processed
603
- st.session_state["last_loaded_path"] = current_path
604
- st.session_state["file_hash"] = current_hash
605
 
606
- prompt_text = (analysis_prompt.strip() or DEFAULT_PROMPT).strip()
607
  out = ""
608
- model_used = model_id
609
- max_tokens = 2048 if "2.5" in model_used else 1024
 
 
 
 
 
610
  est_tokens = max_tokens
 
611
 
612
- # Try Agent first, fallback to Responses API
613
  agent = maybe_create_agent(model_used)
614
  debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None, "agent_response_has_text": False}
615
  if agent:
@@ -618,12 +362,16 @@ if generate_now and not st.session_state.get("busy"):
618
  with st.spinner("Generating description via Agent..."):
619
  if not processed:
620
  raise RuntimeError("Processed file missing for agent generation")
 
621
  agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
 
622
  agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None) or None
623
  if not agent_text:
 
624
  try:
625
  if isinstance(agent_response, dict):
626
- for k in ("content", "outputText", "text", "message"):
 
627
  if k in agent_response and agent_response[k]:
628
  agent_text = agent_response[k]
629
  break
@@ -634,21 +382,143 @@ if generate_now and not st.session_state.get("busy"):
634
  debug_info["agent_ok"] = True
635
  debug_info["agent_response_has_text"] = True
636
  else:
 
637
  debug_info["agent_ok"] = False
638
  except Exception as ae:
 
639
  debug_info["agent_error"] = f"{ae}"
 
640
  debug_info["agent_traceback"] = traceback.format_exc()
 
641
 
642
  if not out:
 
643
  try:
644
- with st.spinner("Generating description via Responses API..."):
645
- out = generate_via_responses_api(prompt_text, processed, model_used, max_tokens=max_tokens, timeout=st.session_state.get("generation_timeout", 300))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646
  except Exception as e:
 
647
  tb = traceback.format_exc()
648
- st.session_state["last_error"] = scrub_api_keys(f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{safe_traceback()}")
649
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
650
  out = ""
651
 
 
652
  if out:
653
  out = remove_prompt_echo(prompt_text, out)
654
  p = prompt_text
@@ -670,8 +540,7 @@ if generate_now and not st.session_state.get("busy"):
670
 
671
  except Exception as e:
672
  tb = traceback.format_exc()
673
- err = scrub_api_keys(f"{e}\n\nDebug: {locals().get('debug_info', {})}\n\nTraceback:\n{safe_traceback()}")
674
- st.session_state["last_error"] = err
675
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
676
  finally:
677
  st.session_state["busy"] = False
 
7
  from glob import glob
8
  from pathlib import Path
9
  from difflib import SequenceMatcher
 
 
10
 
11
  import yt_dlp
12
+ import ffmpeg
13
  import streamlit as st
14
  from dotenv import load_dotenv
15
 
16
  load_dotenv()
17
 
 
18
  try:
19
  from phi.agent import Agent
20
  from phi.model.google import Gemini
21
  from phi.tools.duckduckgo import DuckDuckGo
 
22
  HAS_PHI = True
23
  except Exception:
24
  Agent = Gemini = DuckDuckGo = None
25
  HAS_PHI = False
26
 
 
27
  try:
28
  import google.generativeai as genai
29
+ from google.generativeai import upload_file, get_file # type: ignore
 
30
  HAS_GENAI = True
31
  except Exception:
32
  genai = None
33
  upload_file = get_file = None
34
  HAS_GENAI = False
35
 
 
 
36
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
37
  DATA_DIR = Path("./data")
38
  DATA_DIR.mkdir(exist_ok=True)
39
 
40
+ # Session defaults
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  st.session_state.setdefault("videos", "")
42
  st.session_state.setdefault("loop_video", False)
43
  st.session_state.setdefault("uploaded_file", None)
 
47
  st.session_state.setdefault("analysis_out", "")
48
  st.session_state.setdefault("last_error", "")
49
  st.session_state.setdefault("file_hash", None)
50
+ st.session_state.setdefault("fast_mode", False)
51
  st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
52
  st.session_state.setdefault("last_model", "")
53
  st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
54
  st.session_state.setdefault("last_url_value", "")
 
 
 
55
 
 
56
  def sanitize_filename(path_str: str):
57
  name = Path(path_str).name
58
  return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
59
 
60
  def file_sha256(path: str, block_size: int = 65536) -> str:
61
+ h = hashlib.sha256()
62
+ with open(path, "rb") as f:
63
+ for chunk in iter(lambda: f.read(block_size), b""):
64
+ h.update(chunk)
65
+ return h.hexdigest()
 
 
 
66
 
67
  def convert_video_to_mp4(video_path: str) -> str:
68
  target_path = str(Path(video_path).with_suffix(".mp4"))
69
  if os.path.exists(target_path):
70
  return target_path
71
+ ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
72
  try:
73
+ os.remove(video_path)
74
+ except Exception:
75
+ pass
 
 
 
 
 
 
 
 
 
76
  return target_path
77
 
78
  def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
79
  try:
80
+ ffmpeg.input(input_path).output(target_path, vcodec="libx264", crf=crf, preset=preset).run(overwrite_output=True, quiet=True)
81
+ return target_path
 
 
 
 
 
82
  except Exception:
 
83
  return input_path
84
 
85
  def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
 
88
  outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
89
  ydl_opts = {"outtmpl": outtmpl, "format": "best"}
90
  if video_password:
 
91
  ydl_opts["videopassword"] = video_password
92
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
93
  info = ydl.extract_info(url, download=True)
94
+ video_id = info.get("id") if isinstance(info, dict) else None
95
+ if video_id:
96
+ matches = glob(os.path.join(save_dir, f"{video_id}.*"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  else:
98
+ all_files = glob(os.path.join(save_dir, "*"))
99
+ matches = sorted(all_files, key=os.path.getmtime, reverse=True)[:1] if all_files else []
100
+ if not matches:
101
+ raise FileNotFoundError("Downloaded video not found")
102
+ return convert_video_to_mp4(matches[0])
 
103
 
104
  def file_name_or_id(file_obj):
105
  if file_obj is None:
106
  return None
107
  if isinstance(file_obj, dict):
108
  return file_obj.get("name") or file_obj.get("id")
 
109
  return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
110
 
111
  def get_effective_api_key():
 
118
  try:
119
  genai.configure(api_key=key)
120
  except Exception:
121
+ pass
122
  return True
123
 
 
124
  _agent = None
125
  def maybe_create_agent(model_id: str):
126
  global _agent
 
135
  _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
136
  st.session_state["last_model"] = model_id
137
  except Exception:
 
138
  _agent = None
139
  return _agent
140
 
 
150
  try:
151
  os.remove(f)
152
  except Exception:
153
+ pass
154
 
155
+ # track url changes
156
  current_url = st.session_state.get("url", "")
157
  if current_url != st.session_state.get("last_url_value"):
158
  clear_all_video_state()
159
  st.session_state["last_url_value"] = current_url
160
 
 
161
  st.sidebar.header("Video Input")
162
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
163
 
164
  settings_exp = st.sidebar.expander("Settings", expanded=False)
165
+ model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite", key="model_input")
 
 
 
 
 
 
 
 
166
  settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
167
+ default_prompt = (
168
+ "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  )
170
+ analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
171
+ settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
172
+ settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
173
 
174
+ # Show which key is active
175
  key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
176
  settings_exp.caption(f"Using API key from: **{key_source}**")
177
+
178
  if not get_effective_api_key():
179
  settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
180
 
 
185
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
186
  ]
187
 
 
188
  def upload_video_sdk(filepath: str):
189
  key = get_effective_api_key()
190
  if not key:
 
192
  if not HAS_GENAI or upload_file is None:
193
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
194
  genai.configure(api_key=key)
 
195
  return upload_file(filepath)
196
 
197
+ def wait_for_processed(file_obj, timeout=180):
 
 
198
  if not HAS_GENAI or get_file is None:
199
  return file_obj
200
  start = time.time()
 
203
  return file_obj
204
  backoff = 1.0
205
  while True:
206
+ obj = get_file(name)
 
 
 
 
 
 
 
 
207
  state = getattr(obj, "state", None)
208
  if not state or getattr(state, "name", None) != "PROCESSING":
209
  return obj
 
210
  if time.time() - start > timeout:
211
+ raise TimeoutError("File processing timed out")
212
  time.sleep(backoff)
213
  backoff = min(backoff * 2, 8.0)
214
 
 
231
  return b_full[len(ph):].lstrip(" \n:-")
232
  return text
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  col1, col2 = st.columns([1, 3])
235
  with col1:
236
  generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
237
  with col2:
238
+ pass
239
 
240
  if st.sidebar.button("Load Video", use_container_width=True):
241
  try:
 
250
  except Exception:
251
  st.session_state["file_hash"] = None
252
  except Exception as e:
 
253
  st.sidebar.error(f"Failed to load video: {e}")
254
 
255
  if st.session_state["videos"]:
 
275
  try:
276
  file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
277
  st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
278
+ if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
279
+ st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
280
  except Exception:
281
  pass
282
 
283
+ # --- Generation flow ---
284
  if generate_now and not st.session_state.get("busy"):
285
  if not st.session_state.get("videos"):
286
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
 
295
  if HAS_GENAI and genai is not None:
296
  genai.configure(api_key=key_to_use)
297
  except Exception:
298
+ pass
299
 
300
+ model_id = (st.session_state.get("model_input") or "gemini-2.0-flash-lite").strip()
301
  if st.session_state.get("last_model") != model_id:
302
  st.session_state["last_model"] = ""
303
  maybe_create_agent(model_id)
 
309
  except Exception:
310
  current_hash = None
311
 
 
312
  reupload_needed = True
313
+ if processed and st.session_state.get("last_loaded_path") == current_path and st.session_state.get("file_hash") == current_hash:
 
 
314
  reupload_needed = False
315
 
316
  if reupload_needed:
317
  if not HAS_GENAI:
318
  raise RuntimeError("google.generativeai SDK not available; install it.")
319
  local_path = current_path
320
+ fast_mode = st.session_state.get("fast_mode", False)
321
+ upload_path = local_path
322
+ try:
323
+ file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
324
+ except Exception:
325
+ file_size_mb = 0
326
 
327
+ if not fast_mode and file_size_mb > 50:
328
+ compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
329
  try:
330
+ preset = "veryfast" if fast_mode else "fast"
331
+ upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
332
+ except Exception:
333
+ upload_path = local_path
 
 
334
 
335
+ with st.spinner("Uploading video..."):
336
+ uploaded = upload_video_sdk(upload_path)
337
+ processed = wait_for_processed(uploaded, timeout=180)
338
+ st.session_state["uploaded_file"] = uploaded
339
+ st.session_state["processed_file"] = processed
340
+ st.session_state["last_loaded_path"] = current_path
341
+ st.session_state["file_hash"] = current_hash
 
 
 
 
 
 
 
 
 
342
 
343
+ prompt_text = (analysis_prompt.strip() or default_prompt).strip()
 
 
 
344
 
 
345
  out = ""
346
+ if st.session_state.get("fast_mode"):
347
+ model_used = model_id if model_id else "gemini-2.0-flash-lite"
348
+ max_tokens = 512
349
+ else:
350
+ model_used = model_id
351
+ max_tokens = 1024
352
+
353
  est_tokens = max_tokens
354
+ est_cost_caption = f"Est. max tokens: {est_tokens}"
355
 
356
+ # First try Agent, but guard and FALLBACK to direct genai responses if Agent fails or returns empty.
357
  agent = maybe_create_agent(model_used)
358
  debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None, "agent_response_has_text": False}
359
  if agent:
 
362
  with st.spinner("Generating description via Agent..."):
363
  if not processed:
364
  raise RuntimeError("Processed file missing for agent generation")
365
+ # call agent.run inside try/except to catch library IndexError
366
  agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
367
+ # Try to extract text from common attributes; be defensive
368
  agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None) or None
369
  if not agent_text:
370
+ # try dict-like access
371
  try:
372
  if isinstance(agent_response, dict):
373
+ # check common keys
374
+ for k in ("content", "outputText", "text"):
375
  if k in agent_response and agent_response[k]:
376
  agent_text = agent_response[k]
377
  break
 
382
  debug_info["agent_ok"] = True
383
  debug_info["agent_response_has_text"] = True
384
  else:
385
+ # Agent returned but had no usable text; set a marker to fallback
386
  debug_info["agent_ok"] = False
387
  except Exception as ae:
388
+ # Save agent error and continue to fallback path instead of crashing
389
  debug_info["agent_error"] = f"{ae}"
390
+ # include traceback for debugging
391
  debug_info["agent_traceback"] = traceback.format_exc()
392
+ # Do not re-raise; we'll fallback to genai.responses.generate below
393
 
394
  if not out:
395
+ # Fallback to direct Responses API flow
396
  try:
397
+ if not HAS_GENAI or genai is None:
398
+ raise RuntimeError("Responses API not available; install google.generativeai SDK.")
399
+ genai.configure(api_key=key_to_use)
400
+ fname = file_name_or_id(processed)
401
+ if not fname:
402
+ raise RuntimeError("Uploaded file missing name/id")
403
+ system_msg = {"role": "system", "content": prompt_text}
404
+ user_msg = {"role": "user", "content": "Please summarize the attached video."}
405
+
406
+ try:
407
+ response = genai.responses.generate(
408
+ model=model_used,
409
+ messages=[system_msg, user_msg],
410
+ files=[{"name": fname}],
411
+ safety_settings=safety_settings,
412
+ max_output_tokens=max_tokens,
413
+ )
414
+ except TypeError:
415
+ response = genai.responses.generate(
416
+ model=model_used,
417
+ input=[{"text": prompt_text, "files": [{"name": fname}]}],
418
+ safety_settings=safety_settings,
419
+ max_output_tokens=max_tokens,
420
+ )
421
+
422
+ # Defensive normalization of response -> outputs list
423
+ outputs = []
424
+ if response is None:
425
+ outputs = []
426
+ elif isinstance(response, dict):
427
+ for key in ("output", "candidates", "items", "responses"):
428
+ val = response.get(key)
429
+ if isinstance(val, list) and val:
430
+ outputs = val
431
+ break
432
+ if not outputs:
433
+ for v in response.values():
434
+ if isinstance(v, list) and v:
435
+ outputs = v
436
+ break
437
+ else:
438
+ for attr in ("output", "candidates", "items", "responses"):
439
+ val = getattr(response, attr, None)
440
+ if isinstance(val, list) and val:
441
+ outputs = val
442
+ break
443
+
444
+ # ensure list
445
+ if not isinstance(outputs, list):
446
+ outputs = list(outputs) if outputs else []
447
+
448
+ # extract text pieces safely
449
+ text_pieces = []
450
+ for item in outputs:
451
+ if item is None:
452
+ continue
453
+ # item may be dict or object; attempt to find text-rich fields
454
+ cand_contents = None
455
+ if isinstance(item, dict):
456
+ for k in ("content", "text", "message", "output_text", "output"):
457
+ if k in item and item[k]:
458
+ cand_contents = item[k]
459
+ break
460
+ else:
461
+ for k in ("content", "text", "message", "output", "output_text"):
462
+ cand_contents = getattr(item, k, None)
463
+ if cand_contents:
464
+ break
465
+
466
+ if isinstance(cand_contents, str):
467
+ if cand_contents.strip():
468
+ text_pieces.append(cand_contents.strip())
469
+ continue
470
+
471
+ if isinstance(cand_contents, (list, tuple)):
472
+ for c in cand_contents:
473
+ if c is None:
474
+ continue
475
+ if isinstance(c, str):
476
+ if c.strip():
477
+ text_pieces.append(c.strip())
478
+ continue
479
+ if isinstance(c, dict):
480
+ t = c.get("text") or c.get("content")
481
+ else:
482
+ t = getattr(c, "text", None) or getattr(c, "content", None)
483
+ if t:
484
+ text_pieces.append(str(t).strip())
485
+ continue
486
+
487
+ direct = None
488
+ if isinstance(item, dict):
489
+ direct = item.get("text") or item.get("output_text") or item.get("message")
490
+ else:
491
+ direct = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
492
+ if direct:
493
+ text_pieces.append(str(direct).strip())
494
+
495
+ if not text_pieces:
496
+ top_text = None
497
+ if isinstance(response, dict):
498
+ top_text = response.get("text") or response.get("message")
499
+ else:
500
+ top_text = getattr(response, "text", None) or getattr(response, "message", None)
501
+ if top_text:
502
+ text_pieces.append(str(top_text).strip())
503
+
504
+ # dedupe preserving order
505
+ seen = set()
506
+ filtered = []
507
+ for t in text_pieces:
508
+ if not isinstance(t, str):
509
+ continue
510
+ if t and t not in seen:
511
+ filtered.append(t)
512
+ seen.add(t)
513
+ out = "\n\n".join(filtered)
514
  except Exception as e:
515
+ # Capture clear error to UI and include debug_info
516
  tb = traceback.format_exc()
517
+ st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"
518
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
519
  out = ""
520
 
521
+ # post-process output
522
  if out:
523
  out = remove_prompt_echo(prompt_text, out)
524
  p = prompt_text
 
540
 
541
  except Exception as e:
542
  tb = traceback.format_exc()
543
+ st.session_state["last_error"] = f"{e}\n\nDebug: {locals().get('debug_info', debug_info)}\n\nTraceback:\n{tb}"
 
544
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
545
  finally:
546
  st.session_state["busy"] = False