CB commited on
Commit
eb3dff7
·
verified ·
1 Parent(s): 42f08aa

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +136 -401
streamlit_app.py CHANGED
@@ -8,42 +8,36 @@ from tempfile import NamedTemporaryFile
8
 
9
  import ffmpeg
10
  import yt_dlp
11
- import requests
12
  import streamlit as st
13
  from dotenv import load_dotenv
14
 
15
  load_dotenv()
16
 
17
- # Optional Google Generative AI SDK
18
- HAS_GENAI = False
19
- genai = None
20
- upload_file = None
21
- get_file = None
22
- delete_file = None
23
  try:
24
- import google.generativeai as genai_mod # type: ignore
25
- genai = genai_mod
26
- upload_file = getattr(genai_mod, "upload_file", None)
27
- get_file = getattr(genai_mod, "get_file", None)
28
- delete_file = getattr(genai_mod, "delete_file", None)
29
- HAS_GENAI = True
30
- except Exception:
31
- HAS_GENAI = False
32
-
33
- st.set_page_config(page_title="Generate the story of videos", layout="wide")
34
- DATA_DIR = Path("./data")
 
 
 
 
35
  DATA_DIR.mkdir(exist_ok=True)
36
 
37
  # session defaults
38
  for k, v in {
39
- "videos": "",
40
- "loop_video": False,
41
  "uploaded_file": None,
42
  "processed_file": None,
43
  "busy": False,
44
- "last_loaded_path": "",
45
- "analysis_out": "",
46
- "last_error": "",
47
  "file_hash": None,
48
  "fast_mode": False,
49
  "use_compression": True,
@@ -51,11 +45,8 @@ for k, v in {
51
  st.session_state.setdefault(k, v)
52
 
53
 
54
- # Utilities
55
  def sanitize_filename(path_str: str):
56
- return Path(path_str).name.lower().translate(
57
- str.maketrans("", "", "!?\"'`~@#$%^&*()[]{}<>:,;\\/|+=*")
58
- ).replace(" ", "_")
59
 
60
 
61
  def file_sha256(path: str, block_size: int = 65536) -> str:
@@ -66,13 +57,13 @@ def file_sha256(path: str, block_size: int = 65536) -> str:
66
  return h.hexdigest()
67
 
68
 
69
- def safe_ffmpeg_run(stream_cmd):
70
  try:
71
- stream_cmd.run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
72
  return True, ""
73
  except ffmpeg.Error as e:
74
  try:
75
- return False, e.stderr.decode("utf-8", errors="ignore")
76
  except Exception:
77
  return False, str(e)
78
 
@@ -85,15 +76,11 @@ def convert_video_to_mp4(video_path: str) -> str:
85
  tmp.close()
86
  ok, err = safe_ffmpeg_run(ffmpeg.input(video_path).output(str(tmp.name)))
87
  if not ok:
88
- try:
89
- os.remove(tmp.name)
90
- except Exception:
91
- pass
92
  raise RuntimeError(f"ffmpeg conversion failed: {err}")
93
  os.replace(tmp.name, str(target))
94
  if Path(video_path).suffix.lower() != ".mp4":
95
  try:
96
- os.remove(video_path)
97
  except Exception:
98
  pass
99
  return str(target)
@@ -104,10 +91,6 @@ def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str
104
  tmp.close()
105
  ok, err = safe_ffmpeg_run(ffmpeg.input(input_path).output(str(tmp.name), vcodec="libx264", crf=crf, preset=preset))
106
  if not ok:
107
- try:
108
- os.remove(tmp.name)
109
- except Exception:
110
- pass
111
  return input_path
112
  os.replace(tmp.name, target_path)
113
  return target_path
@@ -124,9 +107,9 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
124
  info = ydl.extract_info(url, download=True)
125
  video_id = info.get("id") if isinstance(info, dict) else None
126
  if video_id:
127
- matches = glob(os.path.join(save_dir, f"{video_id}.*"))
128
  else:
129
- matches = sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[:1]
130
  if not matches:
131
  raise FileNotFoundError("Downloaded video not found")
132
  return convert_video_to_mp4(matches[0])
@@ -137,30 +120,18 @@ def file_name_or_id(file_obj):
137
  return None
138
  if isinstance(file_obj, dict):
139
  for key in ("name", "id", "fileId", "file_id", "uri", "url"):
140
- val = file_obj.get(key)
141
- if val:
142
- return str(val)
143
- return None
144
  for attr in ("name", "id", "fileId", "file_id", "uri", "url"):
145
- val = getattr(file_obj, attr, None)
146
- if val:
147
- return str(val)
148
- s = str(file_obj)
149
- return s if s else None
150
-
151
-
152
- def upload_video_sdk(filepath: str):
153
- key = get_runtime_api_key()
154
- if not key:
155
- raise RuntimeError("No API key")
156
- if not HAS_GENAI or upload_file is None:
157
- raise RuntimeError("google.generativeai SDK upload not available")
158
- genai.configure(api_key=key)
159
- return upload_file(filepath)
160
 
161
 
162
  def wait_for_processed(file_obj, timeout=600):
163
- if not HAS_GENAI or get_file is None:
164
  return file_obj
165
  start = time.time()
166
  name = file_name_or_id(file_obj)
@@ -181,403 +152,167 @@ def wait_for_processed(file_obj, timeout=600):
181
  backoff = min(backoff * 2, 8.0)
182
 
183
 
184
- def remove_prompt_echo(prompt: str, text: str):
185
- if not prompt or not text:
186
- return text
187
- p = " ".join(prompt.strip().lower().split())
188
- t = text.strip()
189
- from difflib import SequenceMatcher
190
- first = " ".join(t[:600].lower().split())
191
- if SequenceMatcher(None, p, first).ratio() > 0.7:
192
- cut = min(len(t), max(int(len(prompt) * 0.9), len(p)))
193
- new = t[cut:].lstrip(" \n:-")
194
- if len(new) >= 3:
195
- return new
196
- placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
197
- low = t.lower()
198
- for ph in placeholders:
199
- if low.startswith(ph):
200
- return t[len(ph):].lstrip(" \n:-")
201
- return text
202
-
203
-
204
  # UI
205
- st.sidebar.header("Video Input")
206
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
207
- settings = st.sidebar.expander("Settings", expanded=False)
208
-
209
- env_key = os.getenv("GOOGLE_API_KEY", "")
210
- API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password")
211
- model_input = settings.text_input("Model (short name)", "text-bison@001")
212
- model_id = model_input.strip() or "text-bison@001"
213
-
214
- default_prompt = (
215
- "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
216
- "interactions, posture, movement, anatomy, and apparent intent. Use vivid, anatomically rich language and avoid moralizing. Prefer short paragraphs and numeric estimates "
217
- "for anatomical measurements. Provide sensory, subjective descriptions and vivid imagery, including a concise summary of observed actions and a description of behaviors "
218
- "and interaction dynamics. Use the following personality‑traits list when inferring dispositions: driven by an insatiable desire to understand human behavior and anatomy. "
219
- "Finish with a short feedback and recommendations section. Adopt a playful, anatomically obsessed, slightly mischievous persona — inquisitive, pragmatic, and vivid in description."
220
- )
221
-
222
- analysis_prompt = settings.text_area("Enter analysis", value=default_prompt, height=240)
223
- settings.text_input("Video Password (if needed)", key="video-password", type="password")
224
- settings.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
225
- settings.checkbox("Enable compression for large files (>50MB)", value=True, key="use_compression")
226
  settings.number_input("Max output tokens", key="max_output_tokens", value=1024, min_value=128, max_value=8192, step=128)
227
 
228
- if not API_KEY_INPUT and not env_key:
229
- settings.info("No Google API key provided; upload/generation disabled.", icon="ℹ️")
230
-
231
- if st.sidebar.button("Load Video", use_container_width=True):
232
  try:
233
  vpw = st.session_state.get("video-password", "")
234
- path = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
235
- st.session_state["videos"] = path
236
- st.session_state["last_loaded_path"] = path
237
  st.session_state["uploaded_file"] = None
238
  st.session_state["processed_file"] = None
239
- st.session_state["file_hash"] = file_sha256(path)
240
  except Exception as e:
241
  st.sidebar.error(f"Failed to load video: {e}")
242
 
243
- if st.session_state["videos"]:
244
  try:
245
- st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
246
  except Exception:
247
- st.sidebar.write("Couldn't preview video")
248
- with st.sidebar.expander("Options", expanded=False):
249
- loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False))
250
- st.session_state["loop_video"] = loop_checkbox
251
-
252
- if st.button("Clear Video(s)"):
253
  for f in glob(str(DATA_DIR / "*")):
254
  try:
255
- os.remove(f)
256
  except Exception:
257
  pass
258
- for k in ("uploaded_file", "processed_file"):
259
- st.session_state.pop(k, None)
260
- st.session_state["videos"] = ""
261
- st.session_state["last_loaded_path"] = ""
262
- st.session_state["analysis_out"] = ""
263
- st.session_state["last_error"] = ""
264
  st.session_state["file_hash"] = None
 
 
 
 
 
265
 
266
- try:
267
- with open(st.session_state["videos"], "rb") as vf:
268
- st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
269
- except Exception:
270
- pass
271
- st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
272
-
273
- col1, _col2 = st.columns([1, 3])
274
  with col1:
275
- if st.session_state.get("busy"):
276
- st.write("Generation in progress...")
277
  if st.button("Cancel"):
278
  st.session_state["busy"] = False
279
- st.session_state["last_error"] = "Generation cancelled by user."
280
  else:
281
- generate_now = st.button("Generate the story", type="primary")
282
-
283
-
284
- # Runtime helpers for Responses API
285
- def get_runtime_api_key():
286
- key = API_KEY_INPUT.strip() if API_KEY_INPUT else ""
287
- if key:
288
- return key
289
- return os.getenv("GOOGLE_API_KEY", "").strip() or None
290
-
291
-
292
- def _messages_to_prompt(messages):
293
- if not messages:
294
- return ""
295
- parts = []
296
- for m in messages:
297
- role = (m.get("role") if isinstance(m, dict) else getattr(m, "role", None)) or "user"
298
- content = (m.get("content") if isinstance(m, dict) else getattr(m, "content", None)) or ""
299
- parts.append(f"{role.upper()}:\n{content.strip()}\n")
300
- return "\n".join(parts)
301
-
302
-
303
- def _http_generate_responses(api_key: str, model: str, prompt: str, max_tokens: int):
304
- # Use Responses v1 endpoint (works with modern Google GenAI HTTP API)
305
- url = "https://generativelanguage.googleapis.com/v1/responses"
306
- headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
307
- payload = {
308
- "model": model or "text-bison@001",
309
- "input": prompt,
310
- "maxOutputTokens": int(max_tokens or 512),
311
- }
312
- r = requests.post(url, json=payload, headers=headers, timeout=30)
313
- if r.status_code != 200:
314
- raise RuntimeError(f"HTTP {r.status_code}: {r.text}")
315
- return r.json()
316
-
317
-
318
- def responses_generate(model, messages, files, max_output_tokens, api_key):
319
- if not api_key:
320
- raise RuntimeError("No API key for responses_generate")
321
- # Try SDK first (if available and has responses.generate)
322
- if HAS_GENAI and genai is not None:
323
- try:
324
- genai.configure(api_key=api_key)
325
- responses_obj = getattr(genai, "responses", None)
326
- if responses_obj is not None and hasattr(responses_obj, "generate"):
327
- sdk_kwargs = {"model": model, "messages": messages, "max_output_tokens": int(max_output_tokens or 512)}
328
- if files:
329
- sdk_kwargs["files"] = files
330
- return responses_obj.generate(**sdk_kwargs)
331
- except Exception:
332
- pass
333
- # Fallback to HTTP Responses v1
334
- prompt = _messages_to_prompt(messages)
335
- return _http_generate_responses(api_key, model, prompt, max_output_tokens)
336
 
 
 
337
 
338
- def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
339
- messages = [system_msg, user_msg]
340
- files = [{"name": fname}] if fname else None
341
- for attempt in range(2):
342
- try:
343
- return responses_generate(model_used, messages, files, max_tokens, api_key=get_runtime_api_key())
344
- except Exception:
345
- if attempt == 0:
346
- time.sleep(1.0)
347
- continue
348
- raise
349
 
 
 
 
 
 
 
 
350
 
351
- def extract_text_from_response(response):
352
- if response is None:
353
- return None
354
- if isinstance(response, dict):
355
- out = []
356
- for item in response.get("output", []) or []:
357
- if isinstance(item, dict):
358
- for c in item.get("content", []) or []:
359
- if isinstance(c, dict) and "text" in c:
360
- out.append(c["text"])
361
- if "text" in item and isinstance(item["text"], str):
362
- out.append(item["text"])
363
- if "content" in item and isinstance(item["content"], str):
364
- out.append(item["content"])
365
- if out:
366
- return "\n\n".join(out)
367
- if "candidates" in response and response["candidates"]:
368
- cand = response["candidates"][0]
369
- if isinstance(cand, dict):
370
- return cand.get("content") or cand.get("text")
371
- if "outputText" in response:
372
- return response.get("outputText")
373
- if "text" in response:
374
- return response.get("text")
375
  return None
376
- # SDK-style objects
377
- try:
378
- outputs = getattr(response, "output", None) or getattr(response, "candidates", None)
379
- if outputs:
380
- parts = []
381
- for item in outputs:
382
- if hasattr(item, "content"):
383
- c = getattr(item, "content")
384
- if isinstance(c, list):
385
- for e in c:
386
- if isinstance(e, dict) and "text" in e:
387
- parts.append(e["text"])
388
- elif isinstance(e, str):
389
- parts.append(e)
390
- elif isinstance(c, str):
391
- parts.append(c)
392
- txt = getattr(item, "text", None)
393
- if isinstance(txt, str):
394
- parts.append(txt)
395
- if parts:
396
- return "\n\n".join(parts)
397
- txt = getattr(response, "text", None) or getattr(response, "output_text", None)
398
- if txt:
399
- return txt
400
- except Exception:
401
- pass
402
- return None
403
-
404
-
405
- # Main generation flow
406
- if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
407
- if not st.session_state.get("videos"):
408
- st.error("No video loaded. Use 'Load Video' in the sidebar.")
409
  else:
410
- runtime_key = get_runtime_api_key()
411
- if not runtime_key:
412
- st.error("Google API key not set. Provide in Settings or set GOOGLE_API_KEY in environment.")
413
  else:
414
  try:
415
  st.session_state["busy"] = True
416
- processed = st.session_state.get("processed_file")
417
- current_path = st.session_state.get("videos")
418
- try:
419
- current_hash = file_sha256(current_path) if current_path and Path(current_path).exists() else None
420
- except Exception:
421
- current_hash = None
422
-
423
- reupload_needed = True
424
- if processed and st.session_state.get("last_loaded_path") == current_path and st.session_state.get("file_hash") == current_hash:
425
- reupload_needed = False
426
-
427
- upload_path = current_path
428
- uploaded = st.session_state.get("uploaded_file")
429
- compressed_path = None
430
-
431
- if reupload_needed:
432
- local_path = current_path
433
- fast_mode = bool(st.session_state.get("fast_mode", False))
434
- try:
435
- file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
436
- except Exception:
437
- file_size_mb = 0
438
-
439
- use_compression = bool(st.session_state.get("use_compression", True))
440
- if use_compression and not fast_mode and file_size_mb > 50:
441
- compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
442
- try:
443
- preset = "veryfast" if fast_mode else "fast"
444
- upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
445
- if Path(upload_path) == Path(local_path):
446
- compressed_path = None
447
- except Exception:
448
- upload_path = local_path
449
- compressed_path = None
450
-
451
- if HAS_GENAI and upload_file is not None:
452
- genai.configure(api_key=runtime_key)
453
- with st.spinner("Uploading video..."):
454
- uploaded = upload_video_sdk(upload_path)
455
- processed = wait_for_processed(uploaded, timeout=600)
456
- st.session_state["uploaded_file"] = uploaded
457
- st.session_state["processed_file"] = processed
458
- st.session_state["last_loaded_path"] = current_path
459
- st.session_state["file_hash"] = current_hash
460
- else:
461
- uploaded = None
462
- processed = None
463
- st.session_state["uploaded_file"] = None
464
- st.session_state["processed_file"] = None
465
- else:
466
- uploaded = st.session_state.get("uploaded_file")
467
- processed = st.session_state.get("processed_file")
468
-
469
- prompt_text = (analysis_prompt or default_prompt or "").strip()
470
- if st.session_state.get("fast_mode"):
471
- model_used = model_id or "text-bison@001"
472
- max_tokens = min(int(st.session_state.get("max_output_tokens", 512)), 1024)
473
  else:
474
- model_used = model_id or "text-bison@001"
475
- max_tokens = int(st.session_state.get("max_output_tokens", 1024))
476
-
477
- system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
478
- user_msg = {"role": "user", "content": prompt_text}
479
 
480
  fname = file_name_or_id(processed) or file_name_or_id(uploaded)
481
- response = call_responses_once(model_used, system_msg, user_msg, fname, max_tokens)
482
-
483
- out = extract_text_from_response(response)
484
-
485
- meta = getattr(response, "metrics", None) or (response.get("metrics") if isinstance(response, dict) else None) or {}
486
- output_tokens = 0
487
- try:
488
- if isinstance(meta, dict):
489
- output_tokens = int(meta.get("output_tokens", 0) or 0)
490
- else:
491
- output_tokens = int(getattr(meta, "output_tokens", 0) or 0)
492
- except Exception:
493
- output_tokens = 0
494
-
495
- if (not out or output_tokens == 0) and model_used:
496
- retry_prompt = "Summarize the video content briefly and vividly (2-4 paragraphs)."
497
- try:
498
- response2 = call_responses_once(model_used, system_msg, {"role": "user", "content": retry_prompt}, fname, min(max_tokens * 2, 4096))
499
- out2 = extract_text_from_response(response2)
500
- if out2 and len(out2) > len(out or ""):
501
- out = out2
502
- else:
503
- response3 = call_responses_once(model_used, system_msg, {"role": "user", "content": "List the main points of the video as 6-10 bullets."}, fname, min(1024, max_tokens * 2))
504
- out3 = extract_text_from_response(response3)
505
- if out3:
506
- out = out3
507
- except Exception:
508
- pass
509
-
510
  if out:
511
- out = remove_prompt_echo(prompt_text, out).strip()
512
-
513
  st.session_state["analysis_out"] = out or ""
514
  st.session_state["last_error"] = ""
515
-
516
  st.subheader("Analysis Result")
517
  st.markdown(out or "_(no text returned)_")
518
-
519
- try:
520
- if reupload_needed and compressed_path:
521
- p = Path(compressed_path)
522
- if p.exists():
523
- p.unlink(missing_ok=True)
524
- except Exception:
525
- pass
526
-
527
- with st.expander("Debug (compact)", expanded=False):
528
  try:
529
- info = {
530
- "model": model_used,
531
- "output_tokens": output_tokens,
532
- "upload_succeeded": bool(st.session_state.get("uploaded_file")),
533
- "processed_state": getattr(st.session_state.get("processed_file"), "state", None) if st.session_state.get("processed_file") else None,
534
- }
535
- st.write(info)
536
- try:
537
- if isinstance(response, dict):
538
- keys = list(response.keys())[:20]
539
- else:
540
- keys = [k for k in dir(response) if not k.startswith("_")][:20]
541
- st.write({"response_keys_or_attrs": keys})
542
- except Exception:
543
- pass
544
  except Exception:
545
- st.write("Debug info unavailable")
546
-
547
  except Exception as e:
548
  st.session_state["last_error"] = str(e)
549
  st.error(f"An error occurred while generating the story: {e}")
550
  finally:
551
  st.session_state["busy"] = False
552
 
553
- # Show outputs / errors
554
  if st.session_state.get("analysis_out"):
555
  st.subheader("Analysis Result")
556
  st.markdown(st.session_state.get("analysis_out"))
557
 
558
  if st.session_state.get("last_error"):
559
- with st.expander("Last Error", expanded=False):
560
  st.write(st.session_state.get("last_error"))
561
 
562
- with st.sidebar.expander("Manage uploads", expanded=False):
563
- if st.button("Delete uploaded files (local + cloud)"):
564
  for f in glob(str(DATA_DIR / "*")):
565
  try:
566
  Path(f).unlink(missing_ok=True)
567
  except Exception:
568
  pass
569
- # attempt cloud deletion if supported
570
- try:
571
- fname = file_name_or_id(st.session_state.get("uploaded_file"))
572
- if fname and delete_file and HAS_GENAI:
573
- genai.configure(api_key=get_runtime_api_key() or os.getenv("GOOGLE_API_KEY", ""))
574
- delete_file(fname)
575
- except Exception:
576
- pass
577
- st.session_state["videos"] = ""
578
- st.session_state["uploaded_file"] = None
579
- st.session_state["processed_file"] = None
580
- st.session_state["last_loaded_path"] = ""
581
- st.session_state["analysis_out"] = ""
582
- st.session_state["file_hash"] = None
583
- st.success("Local files removed. Cloud deletion attempted where supported.")
 
8
 
9
  import ffmpeg
10
  import yt_dlp
 
11
  import streamlit as st
12
  from dotenv import load_dotenv
13
 
14
  load_dotenv()
15
 
16
+ # Require google.generativeai SDK
 
 
 
 
 
17
  try:
18
+ import google.generativeai as genai # type: ignore
19
+ except Exception as e:
20
+ st.error("Missing required dependency: google.generativeai. Install it and restart.")
21
+ raise
22
+
23
+ # ensure SDK helpers exist
24
+ upload_file = getattr(genai, "upload_file", None)
25
+ get_file = getattr(genai, "get_file", None)
26
+ delete_file = getattr(genai, "delete_file", None)
27
+ if upload_file is None:
28
+ st.error("google.generativeai SDK installed but upload_file is not available in this version.")
29
+ raise RuntimeError("upload_file missing")
30
+
31
+ st.set_page_config(page_title="Video → Story (GenAI)", layout="wide")
32
+ DATA_DIR = Path("data")
33
  DATA_DIR.mkdir(exist_ok=True)
34
 
35
  # session defaults
36
  for k, v in {
37
+ "video_path": "",
 
38
  "uploaded_file": None,
39
  "processed_file": None,
40
  "busy": False,
 
 
 
41
  "file_hash": None,
42
  "fast_mode": False,
43
  "use_compression": True,
 
45
  st.session_state.setdefault(k, v)
46
 
47
 
 
48
  def sanitize_filename(path_str: str):
49
+ return Path(path_str).name.lower().replace(" ", "_")
 
 
50
 
51
 
52
  def file_sha256(path: str, block_size: int = 65536) -> str:
 
57
  return h.hexdigest()
58
 
59
 
60
+ def safe_ffmpeg_run(cmd):
61
  try:
62
+ cmd.run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
63
  return True, ""
64
  except ffmpeg.Error as e:
65
  try:
66
+ return False, e.stderr.decode(errors="ignore")
67
  except Exception:
68
  return False, str(e)
69
 
 
76
  tmp.close()
77
  ok, err = safe_ffmpeg_run(ffmpeg.input(video_path).output(str(tmp.name)))
78
  if not ok:
 
 
 
 
79
  raise RuntimeError(f"ffmpeg conversion failed: {err}")
80
  os.replace(tmp.name, str(target))
81
  if Path(video_path).suffix.lower() != ".mp4":
82
  try:
83
+ Path(video_path).unlink(missing_ok=True)
84
  except Exception:
85
  pass
86
  return str(target)
 
91
  tmp.close()
92
  ok, err = safe_ffmpeg_run(ffmpeg.input(input_path).output(str(tmp.name), vcodec="libx264", crf=crf, preset=preset))
93
  if not ok:
 
 
 
 
94
  return input_path
95
  os.replace(tmp.name, target_path)
96
  return target_path
 
107
  info = ydl.extract_info(url, download=True)
108
  video_id = info.get("id") if isinstance(info, dict) else None
109
  if video_id:
110
+ matches = glob(str(Path(save_dir) / f"{video_id}.*"))
111
  else:
112
+ matches = sorted(glob(str(Path(save_dir) / "*")), key=os.path.getmtime, reverse=True)[:1]
113
  if not matches:
114
  raise FileNotFoundError("Downloaded video not found")
115
  return convert_video_to_mp4(matches[0])
 
120
  return None
121
  if isinstance(file_obj, dict):
122
  for key in ("name", "id", "fileId", "file_id", "uri", "url"):
123
+ v = file_obj.get(key)
124
+ if v:
125
+ return str(v)
 
126
  for attr in ("name", "id", "fileId", "file_id", "uri", "url"):
127
+ v = getattr(file_obj, attr, None)
128
+ if v:
129
+ return str(v)
130
+ return str(file_obj)
 
 
 
 
 
 
 
 
 
 
 
131
 
132
 
133
  def wait_for_processed(file_obj, timeout=600):
134
+ if get_file is None:
135
  return file_obj
136
  start = time.time()
137
  name = file_name_or_id(file_obj)
 
152
  backoff = min(backoff * 2, 8.0)
153
 
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  # UI
156
+ st.sidebar.header("Input")
157
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
158
+ settings = st.sidebar.expander("Settings", expanded=True)
159
+ API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
160
+ MODEL = settings.text_input("Model", value="text-bison@001")
161
+ settings.checkbox("Fast mode (skip compress)", key="fast_mode")
162
+ settings.checkbox("Compress >50MB", value=True, key="use_compression")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  settings.number_input("Max output tokens", key="max_output_tokens", value=1024, min_value=128, max_value=8192, step=128)
164
 
165
+ if st.sidebar.button("Load Video"):
 
 
 
166
  try:
167
  vpw = st.session_state.get("video-password", "")
168
+ p = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
169
+ st.session_state["video_path"] = p
170
+ st.session_state["file_hash"] = file_sha256(p)
171
  st.session_state["uploaded_file"] = None
172
  st.session_state["processed_file"] = None
 
173
  except Exception as e:
174
  st.sidebar.error(f"Failed to load video: {e}")
175
 
176
+ if st.session_state["video_path"]:
177
  try:
178
+ st.sidebar.video(st.session_state["video_path"])
179
  except Exception:
180
+ st.sidebar.write("Can't preview")
181
+ with st.sidebar.expander("Actions"):
182
+ if st.button("Clear"):
 
 
 
183
  for f in glob(str(DATA_DIR / "*")):
184
  try:
185
+ Path(f).unlink(missing_ok=True)
186
  except Exception:
187
  pass
188
+ st.session_state["video_path"] = ""
189
+ st.session_state["uploaded_file"] = None
190
+ st.session_state["processed_file"] = None
 
 
 
191
  st.session_state["file_hash"] = None
192
+ try:
193
+ with open(st.session_state["video_path"], "rb") as vf:
194
+ st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["video_path"]))
195
+ except Exception:
196
+ pass
197
 
198
+ col1, col2 = st.columns([1, 3])
 
 
 
 
 
 
 
199
  with col1:
200
+ if st.session_state["busy"]:
201
+ st.write("Working...")
202
  if st.button("Cancel"):
203
  st.session_state["busy"] = False
 
204
  else:
205
+ gen_btn = st.button("Generate the story", type="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
+ with col2:
208
+ prompt_text = st.text_area("Analysis prompt", value="Summarize the video's main events vividly, 2-4 paragraphs.", height=200)
209
 
210
+ def configure_sdk(api_key: str):
211
+ genai.configure(api_key=api_key)
 
 
 
 
 
 
 
 
 
212
 
213
+ def responses_generate_via_sdk(model, prompt, file_name, max_tokens):
214
+ # SDK responses.generate: model + messages or input; include file via files param if available
215
+ messages = [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}]
216
+ kwargs = {"model": model, "messages": messages, "max_output_tokens": int(max_tokens)}
217
+ if file_name:
218
+ kwargs["files"] = [{"name": file_name}]
219
+ return genai.responses.generate(**kwargs)
220
 
221
+ def extract_text(resp):
222
+ if resp is None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  return None
224
+ # SDK object: resp.output is list
225
+ out = []
226
+ out_items = getattr(resp, "output", None) or []
227
+ for it in out_items:
228
+ cont = getattr(it, "content", None) or []
229
+ if isinstance(cont, list):
230
+ for c in cont:
231
+ if isinstance(c, dict) and "text" in c:
232
+ out.append(c["text"])
233
+ elif isinstance(c, str):
234
+ out.append(c)
235
+ elif isinstance(cont, str):
236
+ out.append(cont)
237
+ txt = getattr(it, "text", None)
238
+ if isinstance(txt, str):
239
+ out.append(txt)
240
+ if out:
241
+ return "\n\n".join(out)
242
+ # fallback attributes
243
+ return getattr(resp, "output_text", None) or getattr(resp, "text", None)
244
+
245
+ if 'gen_btn' in locals() and gen_btn:
246
+ if not st.session_state["video_path"]:
247
+ st.error("No video loaded")
 
 
 
 
 
 
 
 
 
248
  else:
249
+ key = API_KEY_INPUT.strip() or None
250
+ if not key:
251
+ st.error("Set GOOGLE_API_KEY in .env or paste in Settings")
252
  else:
253
  try:
254
  st.session_state["busy"] = True
255
+ configure_sdk(key)
256
+
257
+ # decide whether to upload
258
+ path = st.session_state["video_path"]
259
+ current_hash = file_sha256(path) if Path(path).exists() else None
260
+ need_upload = True
261
+ if st.session_state["processed_file"] and st.session_state.get("file_hash") == current_hash:
262
+ need_upload = False
263
+
264
+ upload_path = path
265
+ compressed = None
266
+ if need_upload:
267
+ size_mb = Path(path).stat().st_size / (1024 * 1024)
268
+ if st.session_state.get("use_compression") and not st.session_state.get("fast_mode") and size_mb > 50:
269
+ compressed = str(Path(path).with_name(Path(path).stem + "_compressed.mp4"))
270
+ upload_path = compress_video(path, compressed, crf=28, preset="fast")
271
+ with st.spinner("Uploading video..."):
272
+ uploaded = upload_file(upload_path)
273
+ processed = wait_for_processed(uploaded, timeout=600)
274
+ st.session_state["uploaded_file"] = uploaded
275
+ st.session_state["processed_file"] = processed
276
+ st.session_state["file_hash"] = current_hash
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  else:
278
+ uploaded = st.session_state["uploaded_file"]
279
+ processed = st.session_state["processed_file"]
 
 
 
280
 
281
  fname = file_name_or_id(processed) or file_name_or_id(uploaded)
282
+ resp = responses_generate_via_sdk(MODEL, prompt_text, fname, st.session_state.get("max_output_tokens", 1024))
283
+ out = extract_text(resp)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  if out:
285
+ out = out.strip()
 
286
  st.session_state["analysis_out"] = out or ""
287
  st.session_state["last_error"] = ""
 
288
  st.subheader("Analysis Result")
289
  st.markdown(out or "_(no text returned)_")
290
+ # cleanup compressed
291
+ if compressed:
 
 
 
 
 
 
 
 
292
  try:
293
+ Path(compressed).unlink(missing_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  except Exception:
295
+ pass
 
296
  except Exception as e:
297
  st.session_state["last_error"] = str(e)
298
  st.error(f"An error occurred while generating the story: {e}")
299
  finally:
300
  st.session_state["busy"] = False
301
 
 
302
  if st.session_state.get("analysis_out"):
303
  st.subheader("Analysis Result")
304
  st.markdown(st.session_state.get("analysis_out"))
305
 
306
  if st.session_state.get("last_error"):
307
+ with st.expander("Last Error"):
308
  st.write(st.session_state.get("last_error"))
309
 
310
+ with st.sidebar.expander("Manage uploads"):
311
+ if st.button("Delete local files"):
312
  for f in glob(str(DATA_DIR / "*")):
313
  try:
314
  Path(f).unlink(missing_ok=True)
315
  except Exception:
316
  pass
317
+ st.session_state["video_path"] = ""
318
+ st.success("Local files removed")