CB commited on
Commit
44ae25c
·
verified ·
1 Parent(s): b0626f0

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +289 -308
streamlit_app.py CHANGED
@@ -1,12 +1,9 @@
1
  # streamlit_app.py
2
  import os
3
  import time
4
- import json
5
- import string
6
  from glob import glob
7
  from pathlib import Path
8
- import hashlib
9
- from difflib import SequenceMatcher
10
  from tempfile import NamedTemporaryFile
11
 
12
  import yt_dlp
@@ -16,44 +13,29 @@ from dotenv import load_dotenv
16
 
17
  load_dotenv()
18
 
19
- try:
20
- from phi.agent import Agent
21
- from phi.model.google import Gemini
22
- from phi.tools.duckduckgo import DuckDuckGo
23
- HAS_PHI = True
24
- except Exception:
25
- Agent = Gemini = DuckDuckGo = None
26
- HAS_PHI = False
27
-
28
- try:
29
- import google.generativeai as genai
30
- from google.generativeai import upload_file, get_file # type: ignore
31
- HAS_GENAI = True
32
- except Exception:
33
- genai = None
34
- upload_file = get_file = None
35
- HAS_GENAI = False
36
-
37
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
38
  DATA_DIR = Path("./data")
39
  DATA_DIR.mkdir(exist_ok=True)
40
 
41
- # Session state defaults
42
- st.session_state.setdefault("videos", "")
43
- st.session_state.setdefault("loop_video", False)
44
- st.session_state.setdefault("uploaded_file", None)
45
- st.session_state.setdefault("processed_file", None)
46
- st.session_state.setdefault("busy", False)
47
- st.session_state.setdefault("last_loaded_path", "")
48
- st.session_state.setdefault("analysis_out", "")
49
- st.session_state.setdefault("last_error", "")
50
- st.session_state.setdefault("file_hash", None)
51
- st.session_state.setdefault("fast_mode", False)
52
-
53
- # Helpers
 
 
 
 
54
  def sanitize_filename(path_str: str):
55
- name = Path(path_str).name
56
- return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
57
 
58
  def file_sha256(path: str, block_size: int = 65536) -> str:
59
  h = hashlib.sha256()
@@ -73,32 +55,31 @@ def safe_ffmpeg_run(stream_cmd):
73
  return False, str(e)
74
 
75
  def convert_video_to_mp4(video_path: str) -> str:
76
- target_path = Path(video_path).with_suffix(".mp4")
77
- if target_path.exists():
78
- return str(target_path)
79
- tmp = NamedTemporaryFile(prefix=target_path.stem + "_", suffix=".mp4", delete=False, dir=target_path.parent)
80
  tmp.close()
81
- success, err = safe_ffmpeg_run(ffmpeg.input(video_path).output(str(tmp.name)))
82
- if not success:
83
  try:
84
  os.remove(tmp.name)
85
  except Exception:
86
  pass
87
- raise RuntimeError(f"ffmpeg conversion failed: {err}")
88
- os.replace(tmp.name, str(target_path))
89
- # optional: remove original if different extension
90
  if Path(video_path).suffix.lower() != ".mp4":
91
  try:
92
  os.remove(video_path)
93
  except Exception:
94
  pass
95
- return str(target_path)
96
 
97
  def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
98
  tmp = NamedTemporaryFile(prefix=Path(target_path).stem + "_", suffix=".mp4", delete=False, dir=Path(target_path).parent)
99
  tmp.close()
100
- success, err = safe_ffmpeg_run(ffmpeg.input(input_path).output(str(tmp.name), vcodec="libx264", crf=crf, preset=preset))
101
- if not success:
102
  try:
103
  os.remove(tmp.name)
104
  except Exception:
@@ -111,10 +92,10 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
111
  if not url:
112
  raise ValueError("No URL provided")
113
  outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
114
- ydl_opts = {"outtmpl": outtmpl, "format": "best"}
115
  if video_password:
116
- ydl_opts["videopassword"] = video_password
117
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
118
  info = ydl.extract_info(url, download=True)
119
  video_id = info.get("id") if isinstance(info, dict) else None
120
  if video_id:
@@ -126,226 +107,217 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
126
  return convert_video_to_mp4(matches[0])
127
 
128
  def file_name_or_id(file_obj):
129
- if file_obj is None:
130
  return None
131
- # dict-like
132
  if isinstance(file_obj, dict):
133
- for key in ("name", "id", "fileId", "file_id"):
134
  val = file_obj.get(key)
135
  if val:
136
  s = str(val)
 
 
 
137
  if s.startswith("files/"):
138
  return s
139
- # if id-like (12 chars) return files/{id}
140
- if len(s) == 12 and "/" not in s:
141
  return f"files/{s}"
142
  return s
143
  uri = file_obj.get("uri") or file_obj.get("url")
144
  if uri:
145
- tail = uri.rstrip("/").split("/")[-1]
146
- if tail:
147
- return tail if tail.startswith("files/") else f"files/{tail}"
148
  return None
149
- # object-like (SDK)
150
- for attr in ("name", "id", "fileId", "file_id", "uri"):
151
  val = getattr(file_obj, attr, None)
152
  if val:
153
  s = str(val)
 
 
 
154
  if s.startswith("files/"):
155
  return s
156
- if len(s) == 12 and "/" not in s:
157
  return f"files/{s}"
158
  return s
159
- # last resort: parse string
160
  s = str(file_obj)
 
 
 
161
  if "files/" in s:
162
  idx = s.find("files/")
163
  return s[idx:] if s[idx:].startswith("files/") else f"files/{s[idx+6:]}"
164
  return None
165
 
166
- # Configure Google SDK if key present
167
- if os.getenv("GOOGLE_API_KEY") and HAS_GENAI:
 
 
 
 
 
168
  try:
169
- genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
 
 
 
 
 
 
170
  except Exception:
171
- pass
172
 
173
- # UI: Sidebar inputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  st.sidebar.header("Video Input")
175
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 
176
 
177
- settings_exp = st.sidebar.expander("Settings", expanded=False)
178
- env_api_key = os.getenv("GOOGLE_API_KEY", "")
179
- API_KEY = settings_exp.text_input("Google API Key", value=env_api_key, placeholder="Set GOOGLE_API_KEY in .env or enter here", type="password")
180
- model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
181
  model_id = model_input.strip() or "gemini-2.0-flash-lite"
182
  model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
183
- analysis_prompt = settings_exp.text_area("Enter analysis", value="watch entire video and describe", height=120)
184
- settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
185
- settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
186
- settings_exp.number_input("Max output tokens", key="max_output_tokens", value=1024, min_value=128, max_value=8192, step=128)
187
-
188
- if not API_KEY and not os.getenv("GOOGLE_API_KEY"):
189
- settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
190
-
191
- safety_settings = [
192
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
193
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
194
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
195
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
196
- ]
197
-
198
- # Build Agent if available
199
- _agent = None
200
- if HAS_PHI and HAS_GENAI and (API_KEY or os.getenv("GOOGLE_API_KEY")):
201
- try:
202
- key_to_use = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
203
- genai.configure(api_key=key_to_use)
204
- _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
205
- except Exception:
206
- _agent = None
207
-
208
- def clear_all_video_state():
209
- st.session_state.pop("uploaded_file", None)
210
- st.session_state.pop("processed_file", None)
211
- st.session_state["videos"] = ""
212
- st.session_state["last_loaded_path"] = ""
213
- st.session_state["analysis_out"] = ""
214
- st.session_state["last_error"] = ""
215
- st.session_state["file_hash"] = None
216
- for f in glob(str(DATA_DIR / "*")):
217
- try:
218
- os.remove(f)
219
- except Exception:
220
- pass
221
 
222
- # Track URL changes
223
- if "last_url_value" not in st.session_state:
224
- st.session_state["last_url_value"] = st.session_state.get("url", "")
225
- current_url = st.session_state.get("url", "")
226
- if current_url != st.session_state.get("last_url_value"):
227
- clear_all_video_state()
228
- st.session_state["last_url_value"] = current_url
 
 
 
 
 
 
 
 
 
229
 
230
- # Load video button
231
  if st.sidebar.button("Load Video", use_container_width=True):
232
  try:
233
  vpw = st.session_state.get("video-password", "")
234
  path = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
235
  st.session_state["videos"] = path
236
  st.session_state["last_loaded_path"] = path
237
- st.session_state.pop("uploaded_file", None)
238
- st.session_state.pop("processed_file", None)
239
  st.session_state["file_hash"] = file_sha256(path)
240
  except Exception as e:
241
- st.sidebar.error(f"Failed to load video: {e}")
242
 
243
- # Sidebar preview & options
244
  if st.session_state["videos"]:
245
  try:
246
  st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
247
  except Exception:
248
  st.sidebar.write("Couldn't preview video")
249
-
250
  with st.sidebar.expander("Options", expanded=False):
251
  loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False))
252
  st.session_state["loop_video"] = loop_checkbox
253
 
254
  if st.button("Clear Video(s)"):
255
- clear_all_video_state()
 
 
 
 
 
 
 
 
 
 
 
256
 
257
  try:
258
  with open(st.session_state["videos"], "rb") as vf:
259
  st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
260
  except Exception:
261
- st.sidebar.error("Failed to prepare download")
262
-
263
  st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
264
 
265
- # Upload helpers
266
- def upload_video_sdk(filepath: str):
267
- key = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
268
- if not key:
269
- raise RuntimeError("No API key provided")
270
- if not HAS_GENAI or upload_file is None:
271
- raise RuntimeError("google.generativeai SDK not available; cannot upload")
272
- genai.configure(api_key=key)
273
- return upload_file(filepath)
274
-
275
- def wait_for_processed(file_obj, timeout=180):
276
- if not HAS_GENAI or get_file is None:
277
- return file_obj
278
- start = time.time()
279
- name = file_name_or_id(file_obj)
280
- if not name:
281
- return file_obj
282
- backoff = 1.0
283
- while True:
284
- obj = get_file(name)
285
- state = getattr(obj, "state", None)
286
- if not state or getattr(state, "name", None) != "PROCESSING":
287
- return obj
288
- if time.time() - start > timeout:
289
- raise TimeoutError("File processing timed out")
290
- time.sleep(backoff)
291
- backoff = min(backoff * 2, 8.0)
292
-
293
- # Enhanced remove_prompt_echo function
294
- def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
295
- if not prompt or not text:
296
- return text
297
-
298
- # Normalize the prompt and text
299
- a = " ".join(prompt.strip().lower().split())
300
- b_full = text.strip()
301
- b = " ".join(b_full[:check_len].lower().split())
302
-
303
- # Calculate the similarity ratio
304
- ratio = SequenceMatcher(None, a, b).ratio()
305
-
306
- # If the ratio is high, remove the approximate prefix
307
- if ratio >= ratio_threshold:
308
- cut = min(len(b_full), max(int(len(prompt) * 0.9), len(a)))
309
- new_text = b_full[cut:].lstrip(" \n:-")
310
- if len(new_text) >= 3:
311
- return new_text
312
-
313
- # Remove common placeholder prefixes
314
- placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
315
- low = b_full.strip().lower()
316
- for ph in placeholders:
317
- if low.startswith(ph):
318
- return b_full[len(ph):].lstrip(" \n:-")
319
-
320
- return text
321
-
322
- # Main UI layout
323
  col1, col2 = st.columns([1, 3])
324
  with col1:
325
  if st.session_state.get("busy"):
326
- st.button("Generate the story", disabled=True)
 
 
 
327
  else:
328
  generate_now = st.button("Generate the story", type="primary")
329
  with col2:
330
  pass
331
 
332
- # Generation flow
 
 
 
 
 
 
 
333
  if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
334
  if not st.session_state.get("videos"):
335
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
336
  else:
337
- key_to_use = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
338
- if not key_to_use:
339
- st.error("Google API key not set.")
340
  else:
341
  try:
342
  st.session_state["busy"] = True
343
  processed = st.session_state.get("processed_file")
344
- # Use file hash to determine if we must re-upload
345
  current_path = st.session_state.get("videos")
346
- current_hash = None
347
  try:
348
- current_hash = file_sha256(current_path) if current_path and os.path.exists(current_path) else None
349
  except Exception:
350
  current_hash = None
351
 
@@ -355,9 +327,8 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
355
 
356
  if reupload_needed:
357
  if not HAS_GENAI:
358
- raise RuntimeError("google.generativeai SDK not available; install it.")
359
  local_path = current_path
360
- # Fast mode overrides compression behavior
361
  fast_mode = st.session_state.get("fast_mode", False)
362
  upload_path = local_path
363
  try:
@@ -365,16 +336,16 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
365
  except Exception:
366
  file_size_mb = 0
367
 
368
- # Only compress if large and not in fast mode
369
- if not fast_mode and file_size_mb > 50:
370
  compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
371
  try:
372
- # Use faster preset when focusing on speed
373
  preset = "veryfast" if fast_mode else "fast"
374
  upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
375
  except Exception:
376
  upload_path = local_path
377
 
 
378
  with st.spinner("Uploading video..."):
379
  uploaded = upload_video_sdk(upload_path)
380
  processed = wait_for_processed(uploaded, timeout=180)
@@ -383,158 +354,168 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
383
  st.session_state["last_loaded_path"] = current_path
384
  st.session_state["file_hash"] = current_hash
385
 
386
- prompt_text = (analysis_prompt.strip() or "Describe this video in vivid detail.").strip()
 
 
 
 
 
 
 
 
387
 
388
- out = ""
389
- # Use lighter model/tokens in fast mode
390
  if st.session_state.get("fast_mode"):
391
- model_used = model_arg if model_arg else "gemini-2.0-flash-lite"
392
  max_tokens = min(st.session_state.get("max_output_tokens", 512), 1024)
393
  else:
394
  model_used = model_arg
395
  max_tokens = st.session_state.get("max_output_tokens", 1024)
396
 
397
- # Prepare concise messages
398
  system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
399
  user_msg = {"role": "user", "content": prompt_text}
400
 
401
- debug_info = {"request": None, "response": None, "fname": None}
402
-
403
- def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens, safety_settings):
404
- genai.configure(api_key=key_to_use)
405
  try:
406
  response = genai.responses.generate(
407
  model=model_used,
408
  messages=[system_msg, user_msg],
409
  files=[{"name": fname}],
410
- safety_settings=safety_settings,
411
  max_output_tokens=max_tokens,
412
  )
413
  except TypeError:
414
  response = genai.responses.generate(
415
  model=model_used,
416
  input=[{"text": user_msg["content"], "files": [{"name": fname}]}],
417
- safety_settings=safety_settings,
418
  max_output_tokens=max_tokens,
419
  )
420
  return response
421
 
422
- if _agent:
423
- with st.spinner("Generating description via Agent..."):
424
- response = _agent.run(system_msg["content"] + "\n\n" + user_msg["content"], videos=[processed], safety_settings=safety_settings)
425
- out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
426
- debug_info["response"] = response
427
- else:
428
- if not HAS_GENAI or genai is None:
429
- raise RuntimeError("Responses API not available; install google.generativeai SDK.")
430
- fname = file_name_or_id(processed)
431
- if not fname:
432
- fname = file_name_or_id(st.session_state.get("uploaded_file"))
433
- if not fname:
434
- raise RuntimeError("Uploaded file missing name/id")
435
- debug_info["fname"] = fname
436
-
437
- # Make the request and retry once if no output
438
- response = call_responses_once(model_used, system_msg, user_msg, fname, max_tokens, safety_settings)
439
- debug_info["response"] = response
440
-
441
- def extract_text_from_response(response):
442
- outputs = getattr(response, "output", None) or (response.get("output") if isinstance(response, dict) else None) or []
443
- if not outputs and isinstance(response, dict):
444
- outputs = response.get("output", [])
445
- text_pieces = []
446
- for item in outputs or []:
447
- contents = getattr(item, "content", None) or (item.get("content") if isinstance(item, dict) else None) or []
448
- for c in contents:
449
- ctype = getattr(c, "type", None) or (c.get("type") if isinstance(c, dict) else None)
450
- if ctype in ("output_text", "text") or ctype is None:
451
- txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
452
- if txt:
453
- text_pieces.append(txt)
454
- if not text_pieces:
455
- top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
456
- if top_text:
457
- text_pieces.append(top_text)
458
- seen = set()
459
- filtered = []
460
- for t in text_pieces:
461
- if t not in seen:
462
- filtered.append(t)
463
- seen.add(t)
464
- return "\n\n".join(filtered)
465
-
466
- out = extract_text_from_response(response)
467
-
468
- # Inspect metrics to detect zero-output
469
- meta = getattr(response, "metrics", None) or (response.get("metrics") if isinstance(response, dict) else None) or {}
 
470
  output_tokens = 0
 
 
 
471
  try:
472
- if isinstance(meta, dict):
473
- output_tokens = meta.get("output_tokens", 0) or meta.get("output_tokens", 0)
 
 
474
  else:
475
- output_tokens = getattr(response, "metrics", {}).get("output_tokens", 0)
 
 
 
476
  except Exception:
477
- output_tokens = 0
478
 
479
- if (not out or output_tokens == 0) and model_used:
480
- retry_prompt = "Summarize the video content briefly and vividly (2-4 paragraphs)."
481
- user_msg_retry = {"role": "user", "content": retry_prompt}
482
- retry_max = min(max_tokens * 2, 4096)
483
- try:
484
- response2 = call_responses_once(model_used, system_msg, user_msg_retry, fname, retry_max, safety_settings)
485
- debug_info["response_retry"] = response2
486
- out2 = extract_text_from_response(response2)
487
- if out2 and len(out2) > len(out):
488
- out = out2
489
- except Exception as e:
490
- debug_info["retry_error"] = str(e)
491
-
492
- # Remove prompt echo robustly
493
  if out:
494
- out = remove_prompt_echo(prompt_text, out)
495
- p = prompt_text
496
- if p and out.strip().lower().startswith(p.lower()):
497
- out = out.strip()[len(p):].lstrip(" \n:-")
498
- placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
499
- low = out.strip().lower()
500
- for ph in placeholders:
501
- if low.startswith(ph):
502
- out = out.strip()[len(ph):].lstrip(" \n:-")
503
- break
504
- out = out.strip()
505
-
506
- st.session_state["analysis_out"] = out
507
  st.session_state["last_error"] = ""
 
508
  st.subheader("Analysis Result")
509
  st.markdown(out or "_(no text returned)_")
510
 
511
- # Debugging expander
512
- with st.expander("Debug: request/response", expanded=False):
513
- st.write("model_used:", model_used)
514
- st.write("fname:", debug_info.get("fname"))
515
- st.write("system_msg:", system_msg)
516
- st.write("user_msg:", user_msg)
517
- st.write("response (raw):")
518
- st.write(debug_info.get("response"))
519
- if debug_info.get("response_retry"):
520
- st.write("response (retry):")
521
- st.write(debug_info.get("response_retry"))
522
- if debug_info.get("retry_error"):
523
- st.write("retry_error:", debug_info.get("retry_error"))
524
 
525
  except Exception as e:
526
- st.session_state["last_error"] = str(e)
527
- st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
528
  finally:
529
  st.session_state["busy"] = False
530
 
531
- # Display cached analysis if available (avoid duplicate on same run)
532
  if st.session_state.get("analysis_out"):
533
- just_loaded_same = (st.session_state.get("last_loaded_path") == st.session_state.get("videos"))
534
- if not just_loaded_same:
535
- st.subheader("Analysis Result")
536
- st.markdown(st.session_state.get("analysis_out"))
537
 
538
  if st.session_state.get("last_error"):
539
  with st.expander("Last Error", expanded=False):
540
  st.write(st.session_state.get("last_error"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # streamlit_app.py
2
  import os
3
  import time
4
+ import hashlib
 
5
  from glob import glob
6
  from pathlib import Path
 
 
7
  from tempfile import NamedTemporaryFile
8
 
9
  import yt_dlp
 
13
 
14
  load_dotenv()
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
17
  DATA_DIR = Path("./data")
18
  DATA_DIR.mkdir(exist_ok=True)
19
 
20
+ # session defaults
21
+ for k, v in {
22
+ "videos": "",
23
+ "loop_video": False,
24
+ "uploaded_file": None,
25
+ "processed_file": None,
26
+ "busy": False,
27
+ "last_loaded_path": "",
28
+ "analysis_out": "",
29
+ "last_error": "",
30
+ "file_hash": None,
31
+ "fast_mode": False,
32
+ "use_compression": True,
33
+ }.items():
34
+ st.session_state.setdefault(k, v)
35
+
36
+ # helpers
37
  def sanitize_filename(path_str: str):
38
+ return Path(path_str).name.lower().translate(str.maketrans("", "", "!?\"'`~@#$%^&*()[]{}<>:,;\\/|+=*")).replace(" ", "_")
 
39
 
40
  def file_sha256(path: str, block_size: int = 65536) -> str:
41
  h = hashlib.sha256()
 
55
  return False, str(e)
56
 
57
  def convert_video_to_mp4(video_path: str) -> str:
58
+ target = Path(video_path).with_suffix(".mp4")
59
+ if target.exists():
60
+ return str(target)
61
+ tmp = NamedTemporaryFile(prefix=target.stem + "_", suffix=".mp4", delete=False, dir=target.parent)
62
  tmp.close()
63
+ ok, err = safe_ffmpeg_run(ffmpeg.input(video_path).output(str(tmp.name)))
64
+ if not ok:
65
  try:
66
  os.remove(tmp.name)
67
  except Exception:
68
  pass
69
+ raise RuntimeError("ffmpeg conversion failed")
70
+ os.replace(tmp.name, str(target))
 
71
  if Path(video_path).suffix.lower() != ".mp4":
72
  try:
73
  os.remove(video_path)
74
  except Exception:
75
  pass
76
+ return str(target)
77
 
78
  def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
79
  tmp = NamedTemporaryFile(prefix=Path(target_path).stem + "_", suffix=".mp4", delete=False, dir=Path(target_path).parent)
80
  tmp.close()
81
+ ok, err = safe_ffmpeg_run(ffmpeg.input(input_path).output(str(tmp.name), vcodec="libx264", crf=crf, preset=preset))
82
+ if not ok:
83
  try:
84
  os.remove(tmp.name)
85
  except Exception:
 
92
  if not url:
93
  raise ValueError("No URL provided")
94
  outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
95
+ opts = {"outtmpl": outtmpl, "format": "best"}
96
  if video_password:
97
+ opts["videopassword"] = video_password
98
+ with yt_dlp.YoutubeDL(opts) as ydl:
99
  info = ydl.extract_info(url, download=True)
100
  video_id = info.get("id") if isinstance(info, dict) else None
101
  if video_id:
 
107
  return convert_video_to_mp4(matches[0])
108
 
109
  def file_name_or_id(file_obj):
110
+ if not file_obj:
111
  return None
 
112
  if isinstance(file_obj, dict):
113
+ for key in ("name", "id", "fileId", "file_id", "uri", "url"):
114
  val = file_obj.get(key)
115
  if val:
116
  s = str(val)
117
+ if s.startswith("http://") or s.startswith("https://"):
118
+ tail = s.rstrip("/").split("/")[-1]
119
+ return tail if tail.startswith("files/") else f"files/{tail}"
120
  if s.startswith("files/"):
121
  return s
122
+ if "/" not in s and 6 <= len(s) <= 128:
 
123
  return f"files/{s}"
124
  return s
125
  uri = file_obj.get("uri") or file_obj.get("url")
126
  if uri:
127
+ tail = str(uri).rstrip("/").split("/")[-1]
128
+ return tail if tail.startswith("files/") else f"files/{tail}"
 
129
  return None
130
+ for attr in ("name", "id", "fileId", "file_id", "uri", "url"):
 
131
  val = getattr(file_obj, attr, None)
132
  if val:
133
  s = str(val)
134
+ if s.startswith("http://") or s.startswith("https://"):
135
+ tail = s.rstrip("/").split("/")[-1]
136
+ return tail if tail.startswith("files/") else f"files/{tail}"
137
  if s.startswith("files/"):
138
  return s
139
+ if "/" not in s and 6 <= len(s) <= 128:
140
  return f"files/{s}"
141
  return s
 
142
  s = str(file_obj)
143
+ if "http://" in s or "https://" in s:
144
+ tail = s.rstrip("/").split("/")[-1]
145
+ return tail if tail.startswith("files/") else f"files/{tail}"
146
  if "files/" in s:
147
  idx = s.find("files/")
148
  return s[idx:] if s[idx:].startswith("files/") else f"files/{s[idx+6:]}"
149
  return None
150
 
151
+ # optional Google SDK
152
+ HAS_GENAI = False
153
+ genai = None
154
+ upload_file = None
155
+ get_file = None
156
+ delete_file = None
157
+ if os.getenv("GOOGLE_API_KEY"):
158
  try:
159
+ import google.generativeai as genai_mod
160
+ genai = genai_mod
161
+ upload_file = genai_mod.upload_file
162
+ get_file = genai_mod.get_file
163
+ # delete_file may not exist in SDK; guard later
164
+ delete_file = getattr(genai_mod, "delete_file", None)
165
+ HAS_GENAI = True
166
  except Exception:
167
+ HAS_GENAI = False
168
 
169
+ def upload_video_sdk(filepath: str):
170
+ key = os.getenv("GOOGLE_API_KEY")
171
+ if not key:
172
+ raise RuntimeError("No API key")
173
+ if not HAS_GENAI:
174
+ raise RuntimeError("google.generativeai SDK not available")
175
+ genai.configure(api_key=key)
176
+ return upload_file(filepath)
177
+
178
+ def wait_for_processed(file_obj, timeout=180):
179
+ if not HAS_GENAI or get_file is None:
180
+ return file_obj
181
+ start = time.time()
182
+ name = file_name_or_id(file_obj)
183
+ if not name:
184
+ return file_obj
185
+ backoff = 1.0
186
+ while True:
187
+ obj = get_file(name)
188
+ state = getattr(obj, "state", None)
189
+ if not state or getattr(state, "name", None) != "PROCESSING":
190
+ return obj
191
+ if time.time() - start > timeout:
192
+ raise TimeoutError("File processing timed out")
193
+ time.sleep(backoff)
194
+ backoff = min(backoff * 2, 8.0)
195
+
196
+ def remove_prompt_echo(prompt: str, text: str):
197
+ if not prompt or not text:
198
+ return text
199
+ p = " ".join(prompt.strip().lower().split())
200
+ t = text.strip()
201
+ from difflib import SequenceMatcher
202
+ first = " ".join(t[:600].lower().split())
203
+ if SequenceMatcher(None, p, first).ratio() > 0.7:
204
+ cut = min(len(t), max(int(len(prompt) * 0.9), len(p)))
205
+ new = t[cut:].lstrip(" \n:-")
206
+ if len(new) >= 3:
207
+ return new
208
+ placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
209
+ low = t.lower()
210
+ for ph in placeholders:
211
+ if low.startswith(ph):
212
+ return t[len(ph):].lstrip(" \n:-")
213
+ return text
214
+
215
+ # UI: sidebar
216
  st.sidebar.header("Video Input")
217
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
218
+ settings = st.sidebar.expander("Settings", expanded=False)
219
 
220
+ env_key = os.getenv("GOOGLE_API_KEY", "")
221
+ API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password", help="Provide only if you want uploads/generation. Not stored in session.")
222
+ model_input = settings.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
 
223
  model_id = model_input.strip() or "gemini-2.0-flash-lite"
224
  model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
+ default_prompt = (
227
+ "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
228
+ "interactions, posture, movement, anatomy, and apparent intent. Use vivid, anatomically rich language and avoid moralizing. Prefer short paragraphs and numeric estimates "
229
+ "for anatomical measurements. Provide sensory, subjective descriptions and vivid imagery, including a concise summary of observed actions and a description of behaviors "
230
+ "and interaction dynamics. Use the following personality‑traits list when inferring dispositions: driven by an insatiable desire to understand human behavior and anatomy. "
231
+ "Finish with a short feedback and recommendations section. Adopt a playful, anatomically obsessed, slightly mischievous persona — inquisitive, pragmatic, and vivid in description."
232
+ )
233
+
234
+ analysis_prompt = settings.text_area("Enter analysis", value=default_prompt, height=300)
235
+ settings.text_input("Video Password (if needed)", key="video-password", type="password")
236
+ settings.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
237
+ settings.checkbox("Enable compression for large files (>50MB)", value=True, key="use_compression")
238
+ settings.number_input("Max output tokens", key="max_output_tokens", value=1024, min_value=128, max_value=8192, step=128)
239
+
240
+ if not API_KEY_INPUT and not env_key:
241
+ settings.info("No Google API key provided; upload/generation disabled. Use local-only demos or provide key for real analysis.", icon="ℹ️")
242
 
243
+ # load video
244
  if st.sidebar.button("Load Video", use_container_width=True):
245
  try:
246
  vpw = st.session_state.get("video-password", "")
247
  path = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
248
  st.session_state["videos"] = path
249
  st.session_state["last_loaded_path"] = path
250
+ st.session_state["uploaded_file"] = None
251
+ st.session_state["processed_file"] = None
252
  st.session_state["file_hash"] = file_sha256(path)
253
  except Exception as e:
254
+ st.sidebar.error("Failed to load video")
255
 
 
256
  if st.session_state["videos"]:
257
  try:
258
  st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
259
  except Exception:
260
  st.sidebar.write("Couldn't preview video")
 
261
  with st.sidebar.expander("Options", expanded=False):
262
  loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False))
263
  st.session_state["loop_video"] = loop_checkbox
264
 
265
  if st.button("Clear Video(s)"):
266
+ for f in glob(str(DATA_DIR / "*")):
267
+ try:
268
+ os.remove(f)
269
+ except Exception:
270
+ pass
271
+ for k in ("uploaded_file", "processed_file"):
272
+ st.session_state.pop(k, None)
273
+ st.session_state["videos"] = ""
274
+ st.session_state["last_loaded_path"] = ""
275
+ st.session_state["analysis_out"] = ""
276
+ st.session_state["last_error"] = ""
277
+ st.session_state["file_hash"] = None
278
 
279
  try:
280
  with open(st.session_state["videos"], "rb") as vf:
281
  st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
282
  except Exception:
283
+ pass
 
284
  st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
285
 
286
+ # controls
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  col1, col2 = st.columns([1, 3])
288
  with col1:
289
  if st.session_state.get("busy"):
290
+ st.write("Generation in progress...")
291
+ if st.button("Cancel"):
292
+ st.session_state["busy"] = False
293
+ st.session_state["last_error"] = "Generation cancelled by user."
294
  else:
295
  generate_now = st.button("Generate the story", type="primary")
296
  with col2:
297
  pass
298
 
299
+ # determine runtime API key (one-time entry not stored)
300
+ def get_runtime_api_key():
301
+ key = API_KEY_INPUT.strip() if API_KEY_INPUT else ""
302
+ if key:
303
+ return key
304
+ return os.getenv("GOOGLE_API_KEY", "").strip() or None
305
+
306
+ # generation flow
307
  if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
308
  if not st.session_state.get("videos"):
309
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
310
  else:
311
+ runtime_key = get_runtime_api_key()
312
+ if not runtime_key:
313
+ st.error("Google API key not set. Provide in Settings or set GOOGLE_API_KEY in environment.")
314
  else:
315
  try:
316
  st.session_state["busy"] = True
317
  processed = st.session_state.get("processed_file")
 
318
  current_path = st.session_state.get("videos")
 
319
  try:
320
+ current_hash = file_sha256(current_path) if current_path and Path(current_path).exists() else None
321
  except Exception:
322
  current_hash = None
323
 
 
327
 
328
  if reupload_needed:
329
  if not HAS_GENAI:
330
+ raise RuntimeError("google.generativeai SDK not available")
331
  local_path = current_path
 
332
  fast_mode = st.session_state.get("fast_mode", False)
333
  upload_path = local_path
334
  try:
 
336
  except Exception:
337
  file_size_mb = 0
338
 
339
+ use_compression = st.session_state.get("use_compression", True)
340
+ if use_compression and not fast_mode and file_size_mb > 50:
341
  compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
342
  try:
 
343
  preset = "veryfast" if fast_mode else "fast"
344
  upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
345
  except Exception:
346
  upload_path = local_path
347
 
348
+ genai.configure(api_key=runtime_key)
349
  with st.spinner("Uploading video..."):
350
  uploaded = upload_video_sdk(upload_path)
351
  processed = wait_for_processed(uploaded, timeout=180)
 
354
  st.session_state["last_loaded_path"] = current_path
355
  st.session_state["file_hash"] = current_hash
356
 
357
+ # privacy: delete local copy after successful upload (if different path)
358
+ try:
359
+ if Path(upload_path).exists() and Path(upload_path) != Path(current_path):
360
+ Path(upload_path).unlink(missing_ok=True)
361
+ # optionally remove original local file to avoid persistence
362
+ Path(current_path).unlink(missing_ok=True)
363
+ st.session_state["videos"] = ""
364
+ except Exception:
365
+ pass
366
 
367
+ prompt_text = (analysis_prompt or default_prompt).strip()
 
368
  if st.session_state.get("fast_mode"):
369
+ model_used = model_arg or "gemini-2.0-flash-lite"
370
  max_tokens = min(st.session_state.get("max_output_tokens", 512), 1024)
371
  else:
372
  model_used = model_arg
373
  max_tokens = st.session_state.get("max_output_tokens", 1024)
374
 
 
375
  system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
376
  user_msg = {"role": "user", "content": prompt_text}
377
 
378
+ def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
379
+ genai.configure(api_key=runtime_key)
 
 
380
  try:
381
  response = genai.responses.generate(
382
  model=model_used,
383
  messages=[system_msg, user_msg],
384
  files=[{"name": fname}],
 
385
  max_output_tokens=max_tokens,
386
  )
387
  except TypeError:
388
  response = genai.responses.generate(
389
  model=model_used,
390
  input=[{"text": user_msg["content"], "files": [{"name": fname}]}],
 
391
  max_output_tokens=max_tokens,
392
  )
393
  return response
394
 
395
+ fname = file_name_or_id(processed) or file_name_or_id(st.session_state.get("uploaded_file"))
396
+ if not fname:
397
+ try:
398
+ uri = getattr(processed, "uri", None) or (processed.get("uri") if isinstance(processed, dict) else None)
399
+ if uri:
400
+ tail = str(uri).rstrip("/").split("/")[-1]
401
+ fname = tail if tail.startswith("files/") else f"files/{tail}"
402
+ except Exception:
403
+ pass
404
+ if not fname:
405
+ raise RuntimeError("Uploaded file missing name/id/uri; cannot reference for Responses API.")
406
+
407
+ response = call_responses_once(model_used, system_msg, user_msg, fname, max_tokens)
408
+
409
+ def extract_text_from_response(response):
410
+ outputs = getattr(response, "output", None) or (response.get("output") if isinstance(response, dict) else None) or []
411
+ if not outputs and isinstance(response, dict):
412
+ outputs = response.get("output", [])
413
+ text_pieces = []
414
+ for item in outputs or []:
415
+ contents = getattr(item, "content", None) or (item.get("content") if isinstance(item, dict) else None) or []
416
+ for c in contents:
417
+ ctype = getattr(c, "type", None) or (c.get("type") if isinstance(c, dict) else None)
418
+ if ctype in ("output_text", "text") or ctype is None:
419
+ txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
420
+ if txt:
421
+ text_pieces.append(txt)
422
+ if not text_pieces:
423
+ top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
424
+ if top_text:
425
+ text_pieces.append(top_text)
426
+ seen = set()
427
+ filtered = []
428
+ for t in text_pieces:
429
+ if t not in seen:
430
+ filtered.append(t)
431
+ seen.add(t)
432
+ return "\n\n".join(filtered)
433
+
434
+ out = extract_text_from_response(response)
435
+
436
+ meta = getattr(response, "metrics", None) or (response.get("metrics") if isinstance(response, dict) else None) or {}
437
+ output_tokens = 0
438
+ try:
439
+ if isinstance(meta, dict):
440
+ output_tokens = int(meta.get("output_tokens", 0) or 0)
441
+ else:
442
+ output_tokens = int(getattr(meta, "output_tokens", 0) or 0)
443
+ except Exception:
444
  output_tokens = 0
445
+
446
+ if (not out or output_tokens == 0) and model_used:
447
+ retry_prompt = "Summarize the video content briefly and vividly (2-4 paragraphs)."
448
  try:
449
+ response2 = call_responses_once(model_used, system_msg, {"role": "user", "content": retry_prompt}, fname, min(max_tokens * 2, 4096))
450
+ out2 = extract_text_from_response(response2)
451
+ if out2 and len(out2) > len(out or ""):
452
+ out = out2
453
  else:
454
+ response3 = call_responses_once(model_used, system_msg, {"role": "user", "content": "List the main points of the video as 6-10 bullets."}, fname, min(1024, max_tokens * 2))
455
+ out3 = extract_text_from_response(response3)
456
+ if out3:
457
+ out = out3
458
  except Exception:
459
+ pass
460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  if out:
462
+ out = remove_prompt_echo(prompt_text, out).strip()
463
+
464
+ st.session_state["analysis_out"] = out or ""
 
 
 
 
 
 
 
 
 
 
465
  st.session_state["last_error"] = ""
466
+
467
  st.subheader("Analysis Result")
468
  st.markdown(out or "_(no text returned)_")
469
 
470
+ # compact debug (user-triggered)
471
+ with st.expander("Debug (compact)", expanded=False):
472
+ try:
473
+ info = {
474
+ "model": model_used,
475
+ "output_tokens": output_tokens,
476
+ "upload_succeeded": bool(st.session_state.get("uploaded_file")),
477
+ "processed_active": getattr(st.session_state.get("processed_file"), "state", None) if st.session_state.get("processed_file") else None,
478
+ }
479
+ st.write(info)
480
+ except Exception:
481
+ st.write("Debug info unavailable")
 
482
 
483
  except Exception as e:
484
+ st.session_state["last_error"] = "Generation error"
485
+ st.error("An error occurred while generating the story.")
486
  finally:
487
  st.session_state["busy"] = False
488
 
489
+ # persistent UI: show cached analysis without paths/ids
490
  if st.session_state.get("analysis_out"):
491
+ st.subheader("Analysis Result")
492
+ st.markdown(st.session_state.get("analysis_out"))
 
 
493
 
494
  if st.session_state.get("last_error"):
495
  with st.expander("Last Error", expanded=False):
496
  st.write(st.session_state.get("last_error"))
497
+
498
+ # delete uploaded files (local + cloud if possible)
499
+ with st.sidebar.expander("Manage uploads", expanded=False):
500
+ if st.button("Delete uploaded files (local + cloud)"):
501
+ # delete local files
502
+ for f in glob(str(DATA_DIR / "*")):
503
+ try:
504
+ Path(f).unlink(missing_ok=True)
505
+ except Exception:
506
+ pass
507
+ st.session_state["videos"] = ""
508
+ st.session_state["uploaded_file"] = None
509
+ st.session_state["processed_file"] = None
510
+ st.session_state["last_loaded_path"] = ""
511
+ st.session_state["analysis_out"] = ""
512
+ st.session_state["file_hash"] = None
513
+ # attempt to delete cloud file if SDK supports it
514
+ try:
515
+ fname = file_name_or_id(st.session_state.get("uploaded_file"))
516
+ if fname and delete_file and HAS_GENAI:
517
+ genai.configure(api_key=get_runtime_api_key() or os.getenv("GOOGLE_API_KEY", ""))
518
+ delete_file(fname)
519
+ except Exception:
520
+ pass
521
+ st.success("Local files removed. Cloud deletion attempted where supported.")