CB commited on
Commit
1ed3b89
·
verified ·
1 Parent(s): d9fde4e

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +334 -203
streamlit_app.py CHANGED
@@ -7,6 +7,8 @@ import traceback
7
  from glob import glob
8
  from pathlib import Path
9
  from difflib import SequenceMatcher
 
 
10
 
11
  import yt_dlp
12
  import ffmpeg
@@ -15,29 +17,50 @@ from dotenv import load_dotenv
15
 
16
  load_dotenv()
17
 
 
18
  try:
19
  from phi.agent import Agent
20
  from phi.model.google import Gemini
21
  from phi.tools.duckduckgo import DuckDuckGo
 
22
  HAS_PHI = True
23
  except Exception:
24
  Agent = Gemini = DuckDuckGo = None
25
  HAS_PHI = False
26
 
 
27
  try:
28
  import google.generativeai as genai
29
- from google.generativeai import upload_file, get_file # type: ignore
 
30
  HAS_GENAI = True
31
  except Exception:
32
  genai = None
33
  upload_file = get_file = None
34
  HAS_GENAI = False
35
 
 
 
36
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
37
  DATA_DIR = Path("./data")
38
  DATA_DIR.mkdir(exist_ok=True)
39
 
40
- # Session defaults
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  st.session_state.setdefault("videos", "")
42
  st.session_state.setdefault("loop_video", False)
43
  st.session_state.setdefault("uploaded_file", None)
@@ -47,39 +70,59 @@ st.session_state.setdefault("last_loaded_path", "")
47
  st.session_state.setdefault("analysis_out", "")
48
  st.session_state.setdefault("last_error", "")
49
  st.session_state.setdefault("file_hash", None)
50
- st.session_state.setdefault("fast_mode", False)
51
  st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
52
  st.session_state.setdefault("last_model", "")
53
  st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
54
  st.session_state.setdefault("last_url_value", "")
 
 
 
55
 
 
56
  def sanitize_filename(path_str: str):
57
  name = Path(path_str).name
58
  return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
59
 
60
  def file_sha256(path: str, block_size: int = 65536) -> str:
61
- h = hashlib.sha256()
62
- with open(path, "rb") as f:
63
- for chunk in iter(lambda: f.read(block_size), b""):
64
- h.update(chunk)
65
- return h.hexdigest()
 
 
 
66
 
67
  def convert_video_to_mp4(video_path: str) -> str:
68
  target_path = str(Path(video_path).with_suffix(".mp4"))
69
  if os.path.exists(target_path):
70
  return target_path
71
- ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
72
  try:
73
- os.remove(video_path)
74
- except Exception:
75
- pass
 
 
 
 
 
 
 
 
 
76
  return target_path
77
 
78
  def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
79
  try:
80
- ffmpeg.input(input_path).output(target_path, vcodec="libx264", crf=crf, preset=preset).run(overwrite_output=True, quiet=True)
81
- return target_path
 
 
 
 
 
82
  except Exception:
 
83
  return input_path
84
 
85
  def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
@@ -88,24 +131,46 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
88
  outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
89
  ydl_opts = {"outtmpl": outtmpl, "format": "best"}
90
  if video_password:
 
91
  ydl_opts["videopassword"] = video_password
92
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
93
  info = ydl.extract_info(url, download=True)
94
- video_id = info.get("id") if isinstance(info, dict) else None
95
- if video_id:
96
- matches = glob(os.path.join(save_dir, f"{video_id}.*"))
97
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  all_files = glob(os.path.join(save_dir, "*"))
99
- matches = sorted(all_files, key=os.path.getmtime, reverse=True)[:1] if all_files else []
100
- if not matches:
101
- raise FileNotFoundError("Downloaded video not found")
102
- return convert_video_to_mp4(matches[0])
 
 
 
 
 
 
 
103
 
104
  def file_name_or_id(file_obj):
105
  if file_obj is None:
106
  return None
107
  if isinstance(file_obj, dict):
108
  return file_obj.get("name") or file_obj.get("id")
 
109
  return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
110
 
111
  def get_effective_api_key():
@@ -118,9 +183,10 @@ def configure_genai_if_needed():
118
  try:
119
  genai.configure(api_key=key)
120
  except Exception:
121
- pass
122
  return True
123
 
 
124
  _agent = None
125
  def maybe_create_agent(model_id: str):
126
  global _agent
@@ -135,6 +201,7 @@ def maybe_create_agent(model_id: str):
135
  _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
136
  st.session_state["last_model"] = model_id
137
  except Exception:
 
138
  _agent = None
139
  return _agent
140
 
@@ -150,31 +217,50 @@ def clear_all_video_state():
150
  try:
151
  os.remove(f)
152
  except Exception:
153
- pass
154
 
155
- # track url changes
156
  current_url = st.session_state.get("url", "")
157
  if current_url != st.session_state.get("last_url_value"):
158
  clear_all_video_state()
159
  st.session_state["last_url_value"] = current_url
160
 
 
161
  st.sidebar.header("Video Input")
162
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
163
 
164
  settings_exp = st.sidebar.expander("Settings", expanded=False)
165
- model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite", key="model_input")
 
 
 
 
 
 
 
 
166
  settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
167
- default_prompt = (
168
- "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
169
- )
170
- analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
171
  settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
172
- settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
173
 
174
- # Show which key is active
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
176
  settings_exp.caption(f"Using API key from: **{key_source}**")
177
-
178
  if not get_effective_api_key():
179
  settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
180
 
@@ -185,6 +271,7 @@ safety_settings = [
185
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
186
  ]
187
 
 
188
  def upload_video_sdk(filepath: str):
189
  key = get_effective_api_key()
190
  if not key:
@@ -192,9 +279,12 @@ def upload_video_sdk(filepath: str):
192
  if not HAS_GENAI or upload_file is None:
193
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
194
  genai.configure(api_key=key)
 
195
  return upload_file(filepath)
196
 
197
- def wait_for_processed(file_obj, timeout=180):
 
 
198
  if not HAS_GENAI or get_file is None:
199
  return file_obj
200
  start = time.time()
@@ -203,12 +293,21 @@ def wait_for_processed(file_obj, timeout=180):
203
  return file_obj
204
  backoff = 1.0
205
  while True:
206
- obj = get_file(name)
 
 
 
 
 
 
 
 
207
  state = getattr(obj, "state", None)
208
  if not state or getattr(state, "name", None) != "PROCESSING":
209
  return obj
 
210
  if time.time() - start > timeout:
211
- raise TimeoutError("File processing timed out")
212
  time.sleep(backoff)
213
  backoff = min(backoff * 2, 8.0)
214
 
@@ -231,11 +330,161 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
231
  return b_full[len(ph):].lstrip(" \n:-")
232
  return text
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  col1, col2 = st.columns([1, 3])
235
  with col1:
236
  generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
237
  with col2:
238
- pass
239
 
240
  if st.sidebar.button("Load Video", use_container_width=True):
241
  try:
@@ -250,6 +499,7 @@ if st.sidebar.button("Load Video", use_container_width=True):
250
  except Exception:
251
  st.session_state["file_hash"] = None
252
  except Exception as e:
 
253
  st.sidebar.error(f"Failed to load video: {e}")
254
 
255
  if st.session_state["videos"]:
@@ -275,12 +525,12 @@ if st.session_state["videos"]:
275
  try:
276
  file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
277
  st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
278
- if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
279
- st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
280
  except Exception:
281
  pass
282
 
283
- # --- Generation flow ---
284
  if generate_now and not st.session_state.get("busy"):
285
  if not st.session_state.get("videos"):
286
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -295,9 +545,9 @@ if generate_now and not st.session_state.get("busy"):
295
  if HAS_GENAI and genai is not None:
296
  genai.configure(api_key=key_to_use)
297
  except Exception:
298
- pass
299
 
300
- model_id = (st.session_state.get("model_input") or "gemini-2.0-flash-lite").strip()
301
  if st.session_state.get("last_model") != model_id:
302
  st.session_state["last_model"] = ""
303
  maybe_create_agent(model_id)
@@ -309,51 +559,57 @@ if generate_now and not st.session_state.get("busy"):
309
  except Exception:
310
  current_hash = None
311
 
 
312
  reupload_needed = True
313
- if processed and st.session_state.get("last_loaded_path") == current_path and st.session_state.get("file_hash") == current_hash:
 
 
314
  reupload_needed = False
315
 
316
  if reupload_needed:
317
  if not HAS_GENAI:
318
  raise RuntimeError("google.generativeai SDK not available; install it.")
319
  local_path = current_path
320
- fast_mode = st.session_state.get("fast_mode", False)
321
- upload_path = local_path
322
- try:
323
- file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
324
- except Exception:
325
- file_size_mb = 0
326
 
327
- if not fast_mode and file_size_mb > 50:
328
- compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
329
  try:
330
- preset = "veryfast" if fast_mode else "fast"
331
- upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
332
- except Exception:
333
- upload_path = local_path
 
 
334
 
335
- with st.spinner("Uploading video..."):
336
- uploaded = upload_video_sdk(upload_path)
337
- processed = wait_for_processed(uploaded, timeout=180)
338
- st.session_state["uploaded_file"] = uploaded
339
- st.session_state["processed_file"] = processed
340
- st.session_state["last_loaded_path"] = current_path
341
- st.session_state["file_hash"] = current_hash
 
 
 
 
 
 
 
 
 
342
 
343
- prompt_text = (analysis_prompt.strip() or default_prompt).strip()
 
 
 
344
 
 
345
  out = ""
346
- if st.session_state.get("fast_mode"):
347
- model_used = model_id if model_id else "gemini-2.0-flash-lite"
348
- max_tokens = 512
349
- else:
350
- model_used = model_id
351
- max_tokens = 1024
352
-
353
  est_tokens = max_tokens
354
- est_cost_caption = f"Est. max tokens: {est_tokens}"
355
 
356
- # First try Agent, but guard and FALLBACK to direct genai responses if Agent fails or returns empty.
357
  agent = maybe_create_agent(model_used)
358
  debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None, "agent_response_has_text": False}
359
  if agent:
@@ -362,16 +618,12 @@ if generate_now and not st.session_state.get("busy"):
362
  with st.spinner("Generating description via Agent..."):
363
  if not processed:
364
  raise RuntimeError("Processed file missing for agent generation")
365
- # call agent.run inside try/except to catch library IndexError
366
  agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
367
- # Try to extract text from common attributes; be defensive
368
  agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None) or None
369
  if not agent_text:
370
- # try dict-like access
371
  try:
372
  if isinstance(agent_response, dict):
373
- # check common keys
374
- for k in ("content", "outputText", "text"):
375
  if k in agent_response and agent_response[k]:
376
  agent_text = agent_response[k]
377
  break
@@ -382,143 +634,21 @@ if generate_now and not st.session_state.get("busy"):
382
  debug_info["agent_ok"] = True
383
  debug_info["agent_response_has_text"] = True
384
  else:
385
- # Agent returned but had no usable text; set a marker to fallback
386
  debug_info["agent_ok"] = False
387
  except Exception as ae:
388
- # Save agent error and continue to fallback path instead of crashing
389
  debug_info["agent_error"] = f"{ae}"
390
- # include traceback for debugging
391
  debug_info["agent_traceback"] = traceback.format_exc()
392
- # Do not re-raise; we'll fallback to genai.responses.generate below
393
 
394
  if not out:
395
- # Fallback to direct Responses API flow
396
  try:
397
- if not HAS_GENAI or genai is None:
398
- raise RuntimeError("Responses API not available; install google.generativeai SDK.")
399
- genai.configure(api_key=key_to_use)
400
- fname = file_name_or_id(processed)
401
- if not fname:
402
- raise RuntimeError("Uploaded file missing name/id")
403
- system_msg = {"role": "system", "content": prompt_text}
404
- user_msg = {"role": "user", "content": "Please summarize the attached video."}
405
-
406
- try:
407
- response = genai.responses.generate(
408
- model=model_used,
409
- messages=[system_msg, user_msg],
410
- files=[{"name": fname}],
411
- safety_settings=safety_settings,
412
- max_output_tokens=max_tokens,
413
- )
414
- except TypeError:
415
- response = genai.responses.generate(
416
- model=model_used,
417
- input=[{"text": prompt_text, "files": [{"name": fname}]}],
418
- safety_settings=safety_settings,
419
- max_output_tokens=max_tokens,
420
- )
421
-
422
- # Defensive normalization of response -> outputs list
423
- outputs = []
424
- if response is None:
425
- outputs = []
426
- elif isinstance(response, dict):
427
- for key in ("output", "candidates", "items", "responses"):
428
- val = response.get(key)
429
- if isinstance(val, list) and val:
430
- outputs = val
431
- break
432
- if not outputs:
433
- for v in response.values():
434
- if isinstance(v, list) and v:
435
- outputs = v
436
- break
437
- else:
438
- for attr in ("output", "candidates", "items", "responses"):
439
- val = getattr(response, attr, None)
440
- if isinstance(val, list) and val:
441
- outputs = val
442
- break
443
-
444
- # ensure list
445
- if not isinstance(outputs, list):
446
- outputs = list(outputs) if outputs else []
447
-
448
- # extract text pieces safely
449
- text_pieces = []
450
- for item in outputs:
451
- if item is None:
452
- continue
453
- # item may be dict or object; attempt to find text-rich fields
454
- cand_contents = None
455
- if isinstance(item, dict):
456
- for k in ("content", "text", "message", "output_text", "output"):
457
- if k in item and item[k]:
458
- cand_contents = item[k]
459
- break
460
- else:
461
- for k in ("content", "text", "message", "output", "output_text"):
462
- cand_contents = getattr(item, k, None)
463
- if cand_contents:
464
- break
465
-
466
- if isinstance(cand_contents, str):
467
- if cand_contents.strip():
468
- text_pieces.append(cand_contents.strip())
469
- continue
470
-
471
- if isinstance(cand_contents, (list, tuple)):
472
- for c in cand_contents:
473
- if c is None:
474
- continue
475
- if isinstance(c, str):
476
- if c.strip():
477
- text_pieces.append(c.strip())
478
- continue
479
- if isinstance(c, dict):
480
- t = c.get("text") or c.get("content")
481
- else:
482
- t = getattr(c, "text", None) or getattr(c, "content", None)
483
- if t:
484
- text_pieces.append(str(t).strip())
485
- continue
486
-
487
- direct = None
488
- if isinstance(item, dict):
489
- direct = item.get("text") or item.get("output_text") or item.get("message")
490
- else:
491
- direct = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
492
- if direct:
493
- text_pieces.append(str(direct).strip())
494
-
495
- if not text_pieces:
496
- top_text = None
497
- if isinstance(response, dict):
498
- top_text = response.get("text") or response.get("message")
499
- else:
500
- top_text = getattr(response, "text", None) or getattr(response, "message", None)
501
- if top_text:
502
- text_pieces.append(str(top_text).strip())
503
-
504
- # dedupe preserving order
505
- seen = set()
506
- filtered = []
507
- for t in text_pieces:
508
- if not isinstance(t, str):
509
- continue
510
- if t and t not in seen:
511
- filtered.append(t)
512
- seen.add(t)
513
- out = "\n\n".join(filtered)
514
  except Exception as e:
515
- # Capture clear error to UI and include debug_info
516
  tb = traceback.format_exc()
517
- st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"
518
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
519
  out = ""
520
 
521
- # post-process output
522
  if out:
523
  out = remove_prompt_echo(prompt_text, out)
524
  p = prompt_text
@@ -540,7 +670,8 @@ if generate_now and not st.session_state.get("busy"):
540
 
541
  except Exception as e:
542
  tb = traceback.format_exc()
543
- st.session_state["last_error"] = f"{e}\n\nDebug: {locals().get('debug_info', debug_info)}\n\nTraceback:\n{tb}"
 
544
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
545
  finally:
546
  st.session_state["busy"] = False
 
7
  from glob import glob
8
  from pathlib import Path
9
  from difflib import SequenceMatcher
10
+ import json
11
+ import logging
12
 
13
  import yt_dlp
14
  import ffmpeg
 
17
 
18
  load_dotenv()
19
 
20
+ # Optional PHI integration
21
  try:
22
  from phi.agent import Agent
23
  from phi.model.google import Gemini
24
  from phi.tools.duckduckgo import DuckDuckGo
25
+
26
  HAS_PHI = True
27
  except Exception:
28
  Agent = Gemini = DuckDuckGo = None
29
  HAS_PHI = False
30
 
31
+ # google.generativeai SDK
32
  try:
33
  import google.generativeai as genai
34
+ from google.generativeai import upload_file, get_file
35
+
36
  HAS_GENAI = True
37
  except Exception:
38
  genai = None
39
  upload_file = get_file = None
40
  HAS_GENAI = False
41
 
42
+ logging.basicConfig(level=logging.INFO)
43
+
44
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
45
  DATA_DIR = Path("./data")
46
  DATA_DIR.mkdir(exist_ok=True)
47
 
48
+ # ---- Defaults & constants ----
49
+ MODEL_OPTIONS = [
50
+ "gemini-2.5-flash",
51
+ "gemini-2.5-flash-lite",
52
+ "gemini-2.0-flash",
53
+ "gemini-2.0-flash-lite",
54
+ "custom",
55
+ ]
56
+ DEFAULT_MODEL = "gemini-2.0-flash-lite"
57
+ DEFAULT_PROMPT = (
58
+ "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
59
+ "Keep language professional. Include a list of observations for notable events."
60
+ )
61
+
62
+ # ---- Session defaults ----
63
+ st.session_state.setdefault("url", "")
64
  st.session_state.setdefault("videos", "")
65
  st.session_state.setdefault("loop_video", False)
66
  st.session_state.setdefault("uploaded_file", None)
 
70
  st.session_state.setdefault("analysis_out", "")
71
  st.session_state.setdefault("last_error", "")
72
  st.session_state.setdefault("file_hash", None)
 
73
  st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
74
  st.session_state.setdefault("last_model", "")
75
  st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
76
  st.session_state.setdefault("last_url_value", "")
77
+ st.session_state.setdefault("processing_timeout", 900)
78
+ st.session_state.setdefault("generation_timeout", 300)
79
+ st.session_state.setdefault("compress_threshold_mb", 200)
80
 
81
+ # ---- Helpers ----
82
  def sanitize_filename(path_str: str):
83
  name = Path(path_str).name
84
  return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
85
 
86
  def file_sha256(path: str, block_size: int = 65536) -> str:
87
+ try:
88
+ h = hashlib.sha256()
89
+ with open(path, "rb") as f:
90
+ for chunk in iter(lambda: f.read(block_size), b""):
91
+ h.update(chunk)
92
+ return h.hexdigest()
93
+ except Exception:
94
+ return None
95
 
96
  def convert_video_to_mp4(video_path: str) -> str:
97
  target_path = str(Path(video_path).with_suffix(".mp4"))
98
  if os.path.exists(target_path):
99
  return target_path
 
100
  try:
101
+ ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
102
+ except Exception as e:
103
+ logging.exception("ffmpeg conversion failed")
104
+ # If conversion fails, do not delete original; re-raise for caller to handle if needed
105
+ raise
106
+ # Only remove source if target exists and is non-empty
107
+ if os.path.exists(target_path) and os.path.getsize(target_path) > 0:
108
+ try:
109
+ if str(Path(video_path).resolve()) != str(Path(target_path).resolve()):
110
+ os.remove(video_path)
111
+ except Exception:
112
+ logging.exception("Failed to remove original video after conversion")
113
  return target_path
114
 
115
  def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
116
  try:
117
+ ffmpeg.input(input_path).output(
118
+ target_path, vcodec="libx264", crf=crf, preset=preset
119
+ ).run(overwrite_output=True, quiet=True)
120
+ if os.path.exists(target_path) and os.path.getsize(target_path) > 0:
121
+ return target_path
122
+ logging.warning("Compression completed but target missing or empty; returning input path")
123
+ return input_path
124
  except Exception:
125
+ logging.exception("Video compression failed")
126
  return input_path
127
 
128
  def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
 
131
  outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
132
  ydl_opts = {"outtmpl": outtmpl, "format": "best"}
133
  if video_password:
134
+ # yt-dlp accepts 'videopassword' in options for password-protected videos
135
  ydl_opts["videopassword"] = video_password
136
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
137
  info = ydl.extract_info(url, download=True)
138
+ # info may be a dict for single video or playlist; prefer single entry if present
139
+ video_candidates = []
140
+ if isinstance(info, dict):
141
+ # playlist -> entries list
142
+ entries = info.get("entries")
143
+ if entries:
144
+ # get last-downloaded entry (entries may be nested); map to filesystem files by ids
145
+ for e in entries:
146
+ if isinstance(e, dict) and e.get("id"):
147
+ video_candidates.append(str(Path(save_dir) / f"{e['id']}.mp4"))
148
+ else:
149
+ vid = info.get("id")
150
+ ext = info.get("ext") or "mp4"
151
+ if vid:
152
+ video_candidates.append(str(Path(save_dir) / f"{vid}.{ext}"))
153
+ # fallback: pick most recent file in dir
154
+ if not video_candidates:
155
  all_files = glob(os.path.join(save_dir, "*"))
156
+ if not all_files:
157
+ raise FileNotFoundError("Downloaded video not found")
158
+ matches = sorted(all_files, key=os.path.getmtime, reverse=True)
159
+ chosen = matches[0]
160
+ else:
161
+ # prefer existing files among candidates; pick first that exists, else fall back to newest
162
+ existing = [p for p in video_candidates if os.path.exists(p)]
163
+ chosen = existing[0] if existing else (sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[0])
164
+ # Ensure mp4 target
165
+ final = convert_video_to_mp4(chosen)
166
+ return final
167
 
168
  def file_name_or_id(file_obj):
169
  if file_obj is None:
170
  return None
171
  if isinstance(file_obj, dict):
172
  return file_obj.get("name") or file_obj.get("id")
173
+ # common SDK wrappers may expose 'name', 'id', 'fileId'
174
  return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
175
 
176
  def get_effective_api_key():
 
183
  try:
184
  genai.configure(api_key=key)
185
  except Exception:
186
+ logging.exception("genai.configure failed")
187
  return True
188
 
189
+ # ---- Agent management (reuse) ----
190
  _agent = None
191
  def maybe_create_agent(model_id: str):
192
  global _agent
 
201
  _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
202
  st.session_state["last_model"] = model_id
203
  except Exception:
204
+ logging.exception("Failed to create PHI Agent")
205
  _agent = None
206
  return _agent
207
 
 
217
  try:
218
  os.remove(f)
219
  except Exception:
220
+ logging.exception("Failed to remove data file during clear_all_video_state")
221
 
222
+ # Reset when URL changes
223
  current_url = st.session_state.get("url", "")
224
  if current_url != st.session_state.get("last_url_value"):
225
  clear_all_video_state()
226
  st.session_state["last_url_value"] = current_url
227
 
228
+ # ---- Sidebar UI ----
229
  st.sidebar.header("Video Input")
230
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
231
 
232
  settings_exp = st.sidebar.expander("Settings", expanded=False)
233
+ model_choice = settings_exp.selectbox("Select model", options=MODEL_OPTIONS, index=MODEL_OPTIONS.index(DEFAULT_MODEL) if DEFAULT_MODEL in MODEL_OPTIONS else 0)
234
+ if model_choice == "custom":
235
+ model_input = settings_exp.text_input("Custom model id", value=DEFAULT_MODEL, key="model_input")
236
+ model_selected = model_input.strip() or DEFAULT_MODEL
237
+ else:
238
+ # keep model_input in session_state for later reads
239
+ st.session_state["model_input"] = model_choice
240
+ model_selected = model_choice
241
+
242
  settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
243
+ analysis_prompt = settings_exp.text_area("Analysis prompt", value=DEFAULT_PROMPT, height=140)
 
 
 
244
  settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
 
245
 
246
+ settings_exp.number_input(
247
+ "Processing timeout (s)", min_value=60, max_value=3600,
248
+ value=st.session_state.get("processing_timeout", 900), step=30,
249
+ key="processing_timeout",
250
+ )
251
+ settings_exp.number_input(
252
+ "Generation timeout (s)", min_value=30, max_value=1800,
253
+ value=st.session_state.get("generation_timeout", 300), step=10,
254
+ key="generation_timeout",
255
+ )
256
+ settings_exp.number_input(
257
+ "Optional compression threshold (MB)", min_value=10, max_value=2000,
258
+ value=st.session_state.get("compress_threshold_mb", 200), step=10,
259
+ key="compress_threshold_mb",
260
+ )
261
+
262
  key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
263
  settings_exp.caption(f"Using API key from: **{key_source}**")
 
264
  if not get_effective_api_key():
265
  settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
266
 
 
271
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
272
  ]
273
 
274
+ # ---- Upload & processing helpers ----
275
  def upload_video_sdk(filepath: str):
276
  key = get_effective_api_key()
277
  if not key:
 
279
  if not HAS_GENAI or upload_file is None:
280
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
281
  genai.configure(api_key=key)
282
+ # upload_file may return object with id or name, keep as-is
283
  return upload_file(filepath)
284
 
285
+ def wait_for_processed(file_obj, timeout: int = None):
286
+ if timeout is None:
287
+ timeout = st.session_state.get("processing_timeout", 900)
288
  if not HAS_GENAI or get_file is None:
289
  return file_obj
290
  start = time.time()
 
293
  return file_obj
294
  backoff = 1.0
295
  while True:
296
+ try:
297
+ obj = get_file(name)
298
+ except Exception as e:
299
+ if time.time() - start > timeout:
300
+ raise TimeoutError(f"Failed to fetch file status before timeout: {e}")
301
+ time.sleep(backoff)
302
+ backoff = min(backoff * 2, 8.0)
303
+ continue
304
+
305
  state = getattr(obj, "state", None)
306
  if not state or getattr(state, "name", None) != "PROCESSING":
307
  return obj
308
+
309
  if time.time() - start > timeout:
310
+ raise TimeoutError(f"File processing timed out after {int(time.time() - start)}s")
311
  time.sleep(backoff)
312
  backoff = min(backoff * 2, 8.0)
313
 
 
330
  return b_full[len(ph):].lstrip(" \n:-")
331
  return text
332
 
333
+ def compress_video_if_large(local_path: str, threshold_mb: int = 200):
334
+ try:
335
+ file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
336
+ except Exception as e:
337
+ st.session_state["last_error"] = f"Failed to stat file before compression: {e}"
338
+ return local_path, False
339
+
340
+ if file_size_mb <= threshold_mb:
341
+ return local_path, False
342
+
343
+ # build compressed path reliably
344
+ p = Path(local_path)
345
+ compressed_name = f"{p.stem}_compressed.mp4"
346
+ compressed_path = str(p.with_name(compressed_name))
347
+
348
+ try:
349
+ result = compress_video(local_path, compressed_path, crf=28, preset="fast")
350
+ if result and os.path.exists(result) and os.path.getsize(result) > 0:
351
+ return result, True
352
+ return local_path, False
353
+ except Exception as e:
354
+ st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
355
+ return local_path, False
356
+
357
+ # ---- Responses API integration ----
358
+ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300):
359
+ key = get_effective_api_key()
360
+ if not key:
361
+ raise RuntimeError("No API key provided")
362
+ if not HAS_GENAI or genai is None:
363
+ raise RuntimeError("Responses API not available; install google.generativeai SDK.")
364
+ genai.configure(api_key=key)
365
+ fname = file_name_or_id(processed)
366
+ if not fname:
367
+ raise RuntimeError("Uploaded file missing name/id")
368
+
369
+ system_msg = {"role": "system", "content": prompt_text}
370
+ user_msg = {"role": "user", "content": "Please summarize the attached video."}
371
+
372
+ call_variants = [
373
+ {"messages": [system_msg, user_msg], "files": [{"name": fname}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
374
+ {"input": [{"text": prompt_text, "files": [{"name": fname}]}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
375
+ ]
376
+
377
+ last_exc = None
378
+ start = time.time()
379
+ backoff = 1.0
380
+ while True:
381
+ for payload in call_variants:
382
+ try:
383
+ response = genai.responses.generate(model=model_used, **payload)
384
+ return _normalize_genai_response(response)
385
+ except Exception as e:
386
+ last_exc = e
387
+ msg = str(e).lower()
388
+ # retry for transient/server errors
389
+ if any(k in msg for k in ("internal", "unavailable", "deadlineexceeded", "deadline exceeded", "timeout", "rate limit")):
390
+ logging.warning("Transient error from Responses API, will retry: %s", e)
391
+ continue
392
+ logging.exception("Non-retryable Responses API error")
393
+ raise
394
+ if time.time() - start > timeout:
395
+ raise TimeoutError(f"Responses.generate timed out after {timeout}s: last error: {last_exc}")
396
+ time.sleep(backoff)
397
+ backoff = min(backoff * 2, 8.0)
398
+
399
+ def _normalize_genai_response(response):
400
+ outputs = []
401
+ if response is None:
402
+ return ""
403
+
404
+ if not isinstance(response, dict):
405
+ try:
406
+ response = json.loads(str(response))
407
+ except Exception:
408
+ pass
409
+
410
+ candidate_lists = []
411
+ if isinstance(response, dict):
412
+ for key in ("output", "candidates", "items", "responses", "choices"):
413
+ val = response.get(key)
414
+ if isinstance(val, list) and val:
415
+ candidate_lists.append(val)
416
+ if not candidate_lists and isinstance(response, dict):
417
+ for v in response.values():
418
+ if isinstance(v, list) and v:
419
+ candidate_lists.append(v)
420
+ break
421
+
422
+ text_pieces = []
423
+ for lst in candidate_lists:
424
+ for item in lst:
425
+ if not item:
426
+ continue
427
+ if isinstance(item, dict):
428
+ for k in ("content", "text", "message", "output_text", "output"):
429
+ t = item.get(k)
430
+ if t:
431
+ text_pieces.append(str(t).strip())
432
+ break
433
+ else:
434
+ if "content" in item and isinstance(item["content"], list):
435
+ for part in item["content"]:
436
+ if isinstance(part, dict):
437
+ t = part.get("text") or part.get("content")
438
+ if t:
439
+ text_pieces.append(str(t).strip())
440
+ elif isinstance(part, str):
441
+ text_pieces.append(part.strip())
442
+ elif isinstance(item, str):
443
+ text_pieces.append(item.strip())
444
+ else:
445
+ try:
446
+ t = getattr(item, "text", None) or getattr(item, "content", None)
447
+ if t:
448
+ text_pieces.append(str(t).strip())
449
+ except Exception:
450
+ pass
451
+
452
+ if not text_pieces and isinstance(response, dict):
453
+ for k in ("text", "message", "output_text"):
454
+ v = response.get(k)
455
+ if v:
456
+ text_pieces.append(str(v).strip())
457
+ break
458
+
459
+ seen = set()
460
+ filtered = []
461
+ for t in text_pieces:
462
+ if not isinstance(t, str):
463
+ continue
464
+ if t and t not in seen:
465
+ filtered.append(t)
466
+ seen.add(t)
467
+ return "\n\n".join(filtered).strip()
468
+
469
+ # ---- small helpers for safer tracebacks ----
470
+ def safe_traceback(max_chars=2000):
471
+ tb = traceback.format_exc()
472
+ return tb if len(tb) <= max_chars else tb[:max_chars] + "\n...[truncated]"
473
+
474
+ def scrub_api_keys(s: str) -> str:
475
+ if not s:
476
+ return s
477
+ key = get_effective_api_key()
478
+ if key and key in s:
479
+ return s.replace(key, "[REDACTED_API_KEY]")
480
+ return s
481
+
482
+ # ---- Layout ----
483
  col1, col2 = st.columns([1, 3])
484
  with col1:
485
  generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
486
  with col2:
487
+ st.write("")
488
 
489
  if st.sidebar.button("Load Video", use_container_width=True):
490
  try:
 
499
  except Exception:
500
  st.session_state["file_hash"] = None
501
  except Exception as e:
502
+ logging.exception("Failed to load video")
503
  st.sidebar.error(f"Failed to load video: {e}")
504
 
505
  if st.session_state["videos"]:
 
525
  try:
526
  file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
527
  st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
528
+ if file_size_mb > st.session_state.get("compress_threshold_mb", 200):
529
+ st.sidebar.warning(f"Large file detected — it will be compressed automatically before upload (>{st.session_state.get('compress_threshold_mb')} MB).", icon="⚠️")
530
  except Exception:
531
  pass
532
 
533
+ # ---- Main generation flow ----
534
  if generate_now and not st.session_state.get("busy"):
535
  if not st.session_state.get("videos"):
536
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
 
545
  if HAS_GENAI and genai is not None:
546
  genai.configure(api_key=key_to_use)
547
  except Exception:
548
+ logging.exception("genai.configure failed at start")
549
 
550
+ model_id = (st.session_state.get("model_input") or model_selected or DEFAULT_MODEL).strip()
551
  if st.session_state.get("last_model") != model_id:
552
  st.session_state["last_model"] = ""
553
  maybe_create_agent(model_id)
 
559
  except Exception:
560
  current_hash = None
561
 
562
+ # determine if reupload is needed: same local path + same hash + we have uploaded/processed file id
563
  reupload_needed = True
564
+ uploaded_file = st.session_state.get("uploaded_file")
565
+ uploaded_name = file_name_or_id(uploaded_file)
566
+ if processed and st.session_state.get("last_loaded_path") == current_path and st.session_state.get("file_hash") == current_hash and uploaded_name:
567
  reupload_needed = False
568
 
569
  if reupload_needed:
570
  if not HAS_GENAI:
571
  raise RuntimeError("google.generativeai SDK not available; install it.")
572
  local_path = current_path
573
+ upload_path, compressed = compress_video_if_large(local_path, threshold_mb=st.session_state.get("compress_threshold_mb", 200))
 
 
 
 
 
574
 
575
+ with st.spinner(f"Uploading video{' (compressed)' if compressed else ''}..."):
 
576
  try:
577
+ uploaded = upload_video_sdk(upload_path)
578
+ except Exception as e:
579
+ err = scrub_api_keys(f"Upload failed: {e}\n\nTraceback:\n{safe_traceback()}")
580
+ st.session_state["last_error"] = err
581
+ st.error("Upload failed. See Last Error for details.")
582
+ raise
583
 
584
+ try:
585
+ processing_placeholder = st.empty()
586
+ processing_bar = processing_placeholder.progress(0)
587
+ start_time = time.time()
588
+ processed = wait_for_processed(uploaded, timeout=st.session_state.get("processing_timeout", 900))
589
+ # update progress once after wait (full incremental requires moving polling here)
590
+ elapsed = time.time() - start_time
591
+ timeout = st.session_state.get("processing_timeout", 900)
592
+ pct = min(100, int((elapsed / timeout) * 100)) if timeout > 0 else 0
593
+ processing_bar.progress(pct)
594
+ processing_placeholder.success("Processing complete")
595
+ except Exception as e:
596
+ err = scrub_api_keys(f"Processing failed/wait timeout: {e}\n\nTraceback:\n{safe_traceback()}")
597
+ st.session_state["last_error"] = err
598
+ st.error("Video processing failed or timed out. See Last Error.")
599
+ raise
600
 
601
+ st.session_state["uploaded_file"] = uploaded
602
+ st.session_state["processed_file"] = processed
603
+ st.session_state["last_loaded_path"] = current_path
604
+ st.session_state["file_hash"] = current_hash
605
 
606
+ prompt_text = (analysis_prompt.strip() or DEFAULT_PROMPT).strip()
607
  out = ""
608
+ model_used = model_id
609
+ max_tokens = 2048 if "2.5" in model_used else 1024
 
 
 
 
 
610
  est_tokens = max_tokens
 
611
 
612
+ # Try Agent first, fallback to Responses API
613
  agent = maybe_create_agent(model_used)
614
  debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None, "agent_response_has_text": False}
615
  if agent:
 
618
  with st.spinner("Generating description via Agent..."):
619
  if not processed:
620
  raise RuntimeError("Processed file missing for agent generation")
 
621
  agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
 
622
  agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None) or None
623
  if not agent_text:
 
624
  try:
625
  if isinstance(agent_response, dict):
626
+ for k in ("content", "outputText", "text", "message"):
 
627
  if k in agent_response and agent_response[k]:
628
  agent_text = agent_response[k]
629
  break
 
634
  debug_info["agent_ok"] = True
635
  debug_info["agent_response_has_text"] = True
636
  else:
 
637
  debug_info["agent_ok"] = False
638
  except Exception as ae:
 
639
  debug_info["agent_error"] = f"{ae}"
 
640
  debug_info["agent_traceback"] = traceback.format_exc()
 
641
 
642
  if not out:
 
643
  try:
644
+ with st.spinner("Generating description via Responses API..."):
645
+ out = generate_via_responses_api(prompt_text, processed, model_used, max_tokens=max_tokens, timeout=st.session_state.get("generation_timeout", 300))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646
  except Exception as e:
 
647
  tb = traceback.format_exc()
648
+ st.session_state["last_error"] = scrub_api_keys(f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{safe_traceback()}")
649
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
650
  out = ""
651
 
 
652
  if out:
653
  out = remove_prompt_echo(prompt_text, out)
654
  p = prompt_text
 
670
 
671
  except Exception as e:
672
  tb = traceback.format_exc()
673
+ err = scrub_api_keys(f"{e}\n\nDebug: {locals().get('debug_info', {})}\n\nTraceback:\n{safe_traceback()}")
674
+ st.session_state["last_error"] = err
675
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
676
  finally:
677
  st.session_state["busy"] = False