Hug0endob commited on
Commit
1cd718a
·
verified ·
1 Parent(s): 7b0be11

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +58 -373
streamlit_app.py CHANGED
@@ -2,7 +2,7 @@
2
  # -*- coding: utf-8 -*-
3
 
4
  """
5
- Video‑analysis Streamlit app (refactored).
6
  """
7
 
8
  # ----------------------------------------------------------------------
@@ -26,296 +26,31 @@ import snscrape.modules.twitter as sntwitter
26
  DATA_DIR = Path("./data")
27
  DATA_DIR.mkdir(exist_ok=True)
28
 
29
- def _compress_video(inp: Path, crf: int = 28, preset: str = "fast") -> Path:
30
- """Compress *inp* using libx264; return the compressed file."""
31
- out = inp.with_name(f"{inp.stem}_compressed.mp4")
32
- try:
33
- ffmpeg.input(str(inp)).output(
34
- str(out), vcodec="libx264", crf=crf, preset=preset
35
- ).overwrite_output().run(capture_stdout=True, capture_stderr=True)
36
- except ffmpeg.Error as e:
37
- raise RuntimeError(f"ffmpeg compression failed: {e.stderr.decode()}") from e
38
- return out if out.exists() else inp
39
-
40
-
41
- def _maybe_compress(path: Path, limit_mb: int) -> Tuple[Path, bool]:
42
- """Compress *path* if its size exceeds *limit_mb*."""
43
- size_mb = path.stat().st_size / (1024 * 1024)
44
- if size_mb <= limit_mb:
45
- return path, False
46
- return _compress_video(path), True
47
-
48
-
49
- def _download_direct(url: str, dst: Path) -> Path:
50
- """Download a raw video file via HTTP GET."""
51
- r = requests.get(url, stream=True, timeout=30)
52
- r.raise_for_status()
53
- out = dst / _sanitize_filename(url.split("/")[-1])
54
- with out.open("wb") as f:
55
- for chunk in r.iter_content(chunk_size=8192):
56
- if chunk:
57
- f.write(chunk)
58
- return out
59
-
60
-
61
- def _download_with_yt_dlp(url: str, dst: Path, password: str = "") -> Path:
62
- """Download via yt‑dlp, ensuring the complete file is retrieved."""
63
- tmpl = str(dst / "%(id)s.%(ext)s")
64
- # Prefer a full‑container MP4; fall back to the best available format.
65
- fmt = "best[ext=mp4]/best"
66
-
67
- opts = {
68
- "outtmpl": tmpl,
69
- "format": fmt,
70
- "quiet": True,
71
- "noprogress": True,
72
- "nocheckcertificate": True,
73
- "merge_output_format": "mp4", # force a single MP4 file
74
- "fragment_retries": 0, # avoid fragmented downloads
75
- }
76
- if password:
77
- opts["videopassword"] = password
78
-
79
- progress_bar = st.empty()
80
- status_text = st.empty()
81
-
82
- def _progress_hook(d):
83
- if d["status"] == "downloading":
84
- total = d.get("total_bytes") or d.get("total_bytes_estimate")
85
- downloaded = d.get("downloaded_bytes", 0)
86
- if total:
87
- pct = downloaded / total
88
- progress_bar.progress(pct)
89
- status_text.caption(f"Downloading… {pct:.0%}")
90
- elif d["status"] == "finished":
91
- progress_bar.progress(1.0)
92
- status_text.caption("Download complete, processing…")
93
-
94
- opts["progress_hooks"] = [_progress_hook]
95
-
96
- try:
97
- with yt_dlp.YoutubeDL(opts) as ydl:
98
- ydl.extract_info(url, download=True)
99
- except Exception as e:
100
- raise RuntimeError(f"yt‑dlp could not download the URL: {e}") from e
101
- finally:
102
- progress_bar.empty()
103
- status_text.empty()
104
-
105
- # yt‑dlp may have produced several files; pick the newest MP4
106
- mp4_files = list(dst.glob("*.mp4"))
107
- if not mp4_files:
108
- raise RuntimeError("No MP4 file was created.")
109
- newest = max(mp4_files, key=lambda p: p.stat().st_mtime)
110
-
111
- # Optional cache: if a file with the same SHA‑256 already exists, reuse it
112
- sha = _file_sha256(newest)
113
- if sha:
114
- for existing in dst.iterdir():
115
- if existing != newest and _file_sha256(existing) == sha:
116
- newest.unlink() # remove duplicate
117
- return existing
118
-
119
- return newest
120
-
121
-
122
- def download_video(url: str, dst: Path, password: str = "") -> Path:
123
- """
124
- Download a video from *url* and return an MP4 path.
125
- Strategy
126
- ---------
127
- 1. Direct video URL → HTTP GET.
128
- 2. Twitter status → scrape for embedded video URLs.
129
- 3. yt‑dlp fallback for everything else.
130
- """
131
- video_exts = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
132
-
133
- if url.lower().endswith(video_exts):
134
- return _download_direct(url, dst)
135
-
136
- if "twitter.com" in url and "/status/" in url:
137
- tweet_id = url.split("/")[-1].split("?")[0]
138
- for tweet in sntwitter.TwitterTweetScraper(tweet_id).get_items():
139
- for m in getattr(tweet, "media", []):
140
- if getattr(m, "video_url", None):
141
- return download_video(m.video_url, dst)
142
- for u in getattr(tweet, "urls", []):
143
- if u.expandedUrl.lower().endswith(video_exts):
144
- return download_video(u.expandedUrl, dst)
145
- raise RuntimeError("No video found in the tweet.")
146
-
147
- # Fallback to yt‑dlp for any other URL
148
- return _download_with_yt_dlp(url, dst, password)
149
-
150
-
151
- def _encode_video_b64(path: Path) -> str:
152
- """Read *path* and return a base64‑encoded string."""
153
- return base64.b64encode(path.read_bytes()).decode()
154
-
155
-
156
- def generate_report(
157
- video_path: Path,
158
- prompt: str,
159
- model_id: str,
160
- timeout: int = 300,
161
- ) -> str:
162
- """Send video + prompt to Gemini and return the text response."""
163
- b64 = _encode_video_b64(video_path)
164
- video_part = {"inline_data": {"mime_type": "video/mp4", "data": b64}}
165
- model = genai.GenerativeModel(model_name=model_id)
166
-
167
- resp = model.generate_content(
168
- [prompt, video_part],
169
- generation_config={"max_output_tokens": 1024},
170
- request_options={"timeout": timeout},
171
- )
172
- return getattr(resp, "text", str(resp))
173
-
174
-
175
- def _strip_prompt_echo(prompt: str, text: str, threshold: float = 0.68) -> str:
176
- """Remove the prompt if the model repeats it at the start of *text*."""
177
- if not prompt or not text:
178
- return text
179
-
180
- clean_prompt = " ".join(prompt.lower().split())
181
- snippet = " ".join(text.lower().split()[:600])
182
-
183
- if SequenceMatcher(None, clean_prompt, snippet).ratio() > threshold:
184
- cut = max(len(clean_prompt), int(len(prompt) * 0.9))
185
- return text[cut:].lstrip(" \n:-")
186
- return text
187
-
188
-
189
- # ----------------------------------------------------------------------
190
- # UI helpers
191
- # ----------------------------------------------------------------------
192
- def _expand_sidebar(width: int = 380) -> None:
193
- """Inject CSS to make the sidebar wider."""
194
- st.markdown(
195
- f"""
196
- <style>
197
- .css-1d391kg {{ /* may vary with Streamlit versions */
198
- width: {width}px !important;
199
- min-width: {width}px !important;
200
- }}
201
- </style>
202
- """,
203
- unsafe_allow_html=True,
204
- )
205
-
206
-
207
- # ----------------------------------------------------------------------
208
- # Streamlit UI
209
- # ----------------------------------------------------------------------
210
- def main() -> None:
211
- st.set_page_config(page_title="Video Analysis", layout="wide")
212
- _expand_sidebar()
213
-
214
- # ---------- Sidebar ----------
215
- st.sidebar.header("Video Input")
216
- st.sidebar.text_input("Video URL", key="url", placeholder="https://")
217
-
218
- if st.sidebar.button("Load Video"):
219
- try:
220
- with st.spinner("Downloading video…"):
221
- raw_path = download_video(
222
- st.session_state["url"], DATA_DIR, st.session_state["video_password"]
223
- )
224
- mp4_path = _convert_to_mp4(Path(raw_path))
225
- st.session_state["video_path"] = str(mp4_path)
226
- st.session_state["last_error"] = ""
227
- st.toast("Video ready")
228
- st.experimental_rerun()
229
- except Exception as e:
230
- st.session_state["last_error"] = f"Download failed: {e}"
231
- st.sidebar.error(st.session_state["last_error"])
232
-
233
- # ---------- Settings ----------
234
- with st.sidebar.expander("Settings", expanded=False):
235
- model = st.selectbox(
236
- "Model", MODEL_OPTIONS, index=MODEL_OPTIONS.index(DEFAULT_MODEL)
237
- )
238
- if model == "custom":
239
- model = st.text_input("Custom model ID", value=DEFAULT_MODEL, key="custom_model")
240
- st.session_state["model_input"] = model
241
-
242
- # API key handling
243
- secret_key = os.getenv("GOOGLE_API_KEY", "")
244
- if secret_key:
245
- st.session_state["api_key"] = secret_key
246
- st.text_input("Google API Key", key="api_key", type="password")
247
-
248
- st.text_area(
249
- "Analysis prompt",
250
- value=DEFAULT_PROMPT,
251
- key="prompt",
252
- height=140,
253
- )
254
- st.text_input(
255
- "Video password (if needed)",
256
- key="video_password",
257
- type="password",
258
- )
259
- st.number_input(
260
- "Compress if > (MB)",
261
- min_value=10,
262
- max_value=2000,
263
- value=st.session_state.get("compress_mb", 200),
264
- step=10,
265
- key="compress_mb",
266
- )
267
-
268
- if st.sidebar.button("Clear Video"):
269
- for f in DATA_DIR.iterdir():
270
- try:
271
- f.unlink()
272
- except Exception:
273
- pass
274
- st
275
-
276
- ]
277
  DEFAULT_MODEL = "gemini-2.0-flash-lite"
278
-
279
  DEFAULT_PROMPT = (
280
  "Watch the video and provide a detailed behavioral report focusing on human actions, "
281
  "interactions, posture, movement, and apparent intent. Keep language professional. "
282
  "Include a list of observations for notable events."
283
  )
284
 
285
- # ----------------------------------------------------------------------
286
- # Session‑state defaults
287
- # ----------------------------------------------------------------------
288
- def _init_state() -> None:
289
- defaults = {
290
- "url": "",
291
- "video_path": "",
292
- "model_input": DEFAULT_MODEL,
293
- "prompt": DEFAULT_PROMPT,
294
- "api_key": os.getenv("GOOGLE_API_KEY", "AIzaSyBiAW2GQLid0HGe9Vs_ReKwkwsSVNegNzs"),
295
- "video_password": "",
296
- "compress_mb": 200,
297
- "busy": False,
298
- "last_error": "",
299
- "analysis_out": "",
300
- "raw_output": "",
301
- "last_error_detail": "",
302
- "show_raw_on_error": False,
303
- "show_analysis": False,
304
- }
305
- for k, v in defaults.items():
306
- st.session_state.setdefault(k, v)
307
-
308
- _init_state()
309
 
310
  # ----------------------------------------------------------------------
311
  # Helper utilities
312
  # ----------------------------------------------------------------------
313
  def _sanitize_filename(url: str) -> str:
 
314
  name = Path(url).name.lower()
315
  return name.translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
316
 
317
 
318
  def _file_sha256(path: Path) -> Optional[str]:
 
319
  try:
320
  h = hashlib.sha256()
321
  with path.open("rb") as f:
@@ -327,6 +62,7 @@ def _file_sha256(path: Path) -> Optional[str]:
327
 
328
 
329
  def _convert_to_mp4(src: Path) -> Path:
 
330
  dst = src.with_suffix(".mp4")
331
  if dst.exists():
332
  return dst
@@ -343,7 +79,7 @@ def _convert_to_mp4(src: Path) -> Path:
343
 
344
 
345
  def _compress_video(inp: Path, crf: int = 28, preset: str = "fast") -> Path:
346
- """Compress *inp* using libx264; return the compressed file."""
347
  out = inp.with_name(f"{inp.stem}_compressed.mp4")
348
  try:
349
  ffmpeg.input(str(inp)).output(
@@ -355,7 +91,7 @@ def _compress_video(inp: Path, crf: int = 28, preset: str = "fast") -> Path:
355
 
356
 
357
  def _maybe_compress(path: Path, limit_mb: int) -> Tuple[Path, bool]:
358
- """Compress *path* if its size exceeds *limit_mb*."""
359
  size_mb = path.stat().st_size / (1024 * 1024)
360
  if size_mb <= limit_mb:
361
  return path, False
@@ -363,7 +99,7 @@ def _maybe_compress(path: Path, limit_mb: int) -> Tuple[Path, bool]:
363
 
364
 
365
  def _download_direct(url: str, dst: Path) -> Path:
366
- """Download a raw video file via HTTP GET."""
367
  r = requests.get(url, stream=True, timeout=30)
368
  r.raise_for_status()
369
  out = dst / _sanitize_filename(url.split("/")[-1])
@@ -375,9 +111,8 @@ def _download_direct(url: str, dst: Path) -> Path:
375
 
376
 
377
  def _download_with_yt_dlp(url: str, dst: Path, password: str = "") -> Path:
378
- """Download via yt‑dlp, ensuring the complete file is retrieved."""
379
  tmpl = str(dst / "%(id)s.%(ext)s")
380
- # Prefer a full‑container MP4; fall back to the best available format.
381
  fmt = "best[ext=mp4]/best"
382
 
383
  opts = {
@@ -386,8 +121,8 @@ def _download_with_yt_dlp(url: str, dst: Path, password: str = "") -> Path:
386
  "quiet": True,
387
  "noprogress": True,
388
  "nocheckcertificate": True,
389
- "merge_output_format": "mp4", # force a single MP4 file
390
- "fragment_retries": 0, # avoid fragmented downloads
391
  }
392
  if password:
393
  opts["videopassword"] = password
@@ -412,38 +147,27 @@ def _download_with_yt_dlp(url: str, dst: Path, password: str = "") -> Path:
412
  try:
413
  with yt_dlp.YoutubeDL(opts) as ydl:
414
  ydl.extract_info(url, download=True)
415
- except Exception as e:
416
- raise RuntimeError(f"yt‑dlp could not download the URL: {e}") from e
417
  finally:
418
  progress_bar.empty()
419
  status_text.empty()
420
 
421
- # yt‑dlp may have produced several files; pick the newest MP4
422
  mp4_files = list(dst.glob("*.mp4"))
423
  if not mp4_files:
424
  raise RuntimeError("No MP4 file was created.")
425
  newest = max(mp4_files, key=lambda p: p.stat().st_mtime)
426
 
427
- # Optional cache: if a file with the same SHA‑256 already exists, reuse it
428
  sha = _file_sha256(newest)
429
  if sha:
430
  for existing in dst.iterdir():
431
  if existing != newest and _file_sha256(existing) == sha:
432
- newest.unlink() # remove duplicate
433
  return existing
434
-
435
  return newest
436
 
437
 
438
  def download_video(url: str, dst: Path, password: str = "") -> Path:
439
- """
440
- Download a video from *url* and return an MP4 path.
441
- Strategy
442
- ---------
443
- 1. Direct video URL → HTTP GET.
444
- 2. Twitter status → scrape for embedded video URLs.
445
- 3. yt‑dlp fallback for everything else.
446
- """
447
  video_exts = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
448
 
449
  if url.lower().endswith(video_exts):
@@ -460,22 +184,16 @@ def download_video(url: str, dst: Path, password: str = "") -> Path:
460
  return download_video(u.expandedUrl, dst)
461
  raise RuntimeError("No video found in the tweet.")
462
 
463
- # Fallback to yt‑dlp for any other URL
464
  return _download_with_yt_dlp(url, dst, password)
465
 
466
 
467
  def _encode_video_b64(path: Path) -> str:
468
- """Read *path* and return a base64‑encoded string."""
469
  return base64.b64encode(path.read_bytes()).decode()
470
 
471
 
472
- def generate_report(
473
- video_path: Path,
474
- prompt: str,
475
- model_id: str,
476
- timeout: int = 300,
477
- ) -> str:
478
- """Send video + prompt to Gemini and return the text response."""
479
  b64 = _encode_video_b64(video_path)
480
  video_part = {"inline_data": {"mime_type": "video/mp4", "data": b64}}
481
  model = genai.GenerativeModel(model_name=model_id)
@@ -489,13 +207,11 @@ def generate_report(
489
 
490
 
491
  def _strip_prompt_echo(prompt: str, text: str, threshold: float = 0.68) -> str:
492
- """Remove the prompt if the model repeats it at the start of *text*."""
493
  if not prompt or not text:
494
  return text
495
-
496
  clean_prompt = " ".join(prompt.lower().split())
497
  snippet = " ".join(text.lower().split()[:600])
498
-
499
  if SequenceMatcher(None, clean_prompt, snippet).ratio() > threshold:
500
  cut = max(len(clean_prompt), int(len(prompt) * 0.9))
501
  return text[cut:].lstrip(" \n:-")
@@ -506,7 +222,7 @@ def _strip_prompt_echo(prompt: str, text: str, threshold: float = 0.68) -> str:
506
  # UI helpers
507
  # ----------------------------------------------------------------------
508
  def _expand_sidebar(width: int = 380) -> None:
509
- """Inject CSS to make the sidebar wider."""
510
  st.markdown(
511
  f"""
512
  <style>
@@ -519,72 +235,41 @@ def _expand_sidebar(width: int = 380) -> None:
519
  unsafe_allow_html=True,
520
  )
521
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522
 
523
  # ----------------------------------------------------------------------
524
- # Streamlit UI
525
  # ----------------------------------------------------------------------
526
- def main() -> None:
527
- st.set_page_config(page_title="Video Analysis", layout="wide")
528
- _expand_sidebar()
529
-
530
- # ---------- Sidebar ----------
531
- st.sidebar.header("Video Input")
532
- st.sidebar.text_input("Video URL", key="url", placeholder="https://")
533
-
534
- if st.sidebar.button("Load Video"):
535
- try:
536
- with st.spinner("Downloading video…"):
537
- raw_path = download_video(
538
- st.session_state["url"], DATA_DIR, st.session_state["video_password"]
539
- )
540
- mp4_path = _convert_to_mp4(Path(raw_path))
541
- st.session_state["video_path"] = str(mp4_path)
542
- st.session_state["last_error"] = ""
543
- st.toast("Video ready")
544
- st.experimental_rerun()
545
- except Exception as e:
546
- st.session_state["last_error"] = f"Download failed: {e}"
547
- st.sidebar.error(st.session_state["last_error"])
548
-
549
- # ---------- Settings ----------
550
- with st.sidebar.expander("Settings", expanded=False):
551
- model = st.selectbox(
552
- "Model", MODEL_OPTIONS, index=MODEL_OPTIONS.index(DEFAULT_MODEL)
553
- )
554
- if model == "custom":
555
- model = st.text_input("Custom model ID", value=DEFAULT_MODEL, key="custom_model")
556
- st.session_state["model_input"] = model
557
-
558
- # API key handling
559
- secret_key = os.getenv("GOOGLE_API_KEY", "")
560
- if secret_key:
561
- st.session_state["api_key"] = secret_key
562
- st.text_input("Google API Key", key="api_key", type="password")
563
-
564
- st.text_area(
565
- "Analysis prompt",
566
- value=DEFAULT_PROMPT,
567
- key="prompt",
568
- height=140,
569
- )
570
- st.text_input(
571
- "Video password (if needed)",
572
- key="video_password",
573
- type="password",
574
- )
575
- st.number_input(
576
- "Compress if > (MB)",
577
- min_value=10,
578
- max_value=2000,
579
- value=st.session_state.get("compress_mb", 200),
580
- step=10,
581
- key="compress_mb",
582
- )
583
 
584
- if st.sidebar.button("Clear Video"):
585
- for f in DATA_DIR.iterdir():
586
- try:
587
- f.unlink()
588
- except Exception:
589
- pass
590
- st
 
2
  # -*- coding: utf-8 -*-
3
 
4
  """
5
+ Video‑analysis Streamlit app (refactored & fixed).
6
  """
7
 
8
  # ----------------------------------------------------------------------
 
26
  DATA_DIR = Path("./data")
27
  DATA_DIR.mkdir(exist_ok=True)
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  DEFAULT_MODEL = "gemini-2.0-flash-lite"
 
30
  DEFAULT_PROMPT = (
31
  "Watch the video and provide a detailed behavioral report focusing on human actions, "
32
  "interactions, posture, movement, and apparent intent. Keep language professional. "
33
  "Include a list of observations for notable events."
34
  )
35
 
36
+ MODEL_OPTIONS = [
37
+ "gemini-1.5-pro",
38
+ "gemini-1.5-flash",
39
+ "gemini-2.0-flash-lite",
40
+ "custom",
41
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # ----------------------------------------------------------------------
44
  # Helper utilities
45
  # ----------------------------------------------------------------------
46
  def _sanitize_filename(url: str) -> str:
47
+ """Make a safe filename from a URL."""
48
  name = Path(url).name.lower()
49
  return name.translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
50
 
51
 
52
  def _file_sha256(path: Path) -> Optional[str]:
53
+ """Return SHA‑256 hex digest of *path* or None on error."""
54
  try:
55
  h = hashlib.sha256()
56
  with path.open("rb") as f:
 
62
 
63
 
64
  def _convert_to_mp4(src: Path) -> Path:
65
+ """Convert *src* to MP4 (ffmpeg) and delete the original."""
66
  dst = src.with_suffix(".mp4")
67
  if dst.exists():
68
  return dst
 
79
 
80
 
81
  def _compress_video(inp: Path, crf: int = 28, preset: str = "fast") -> Path:
82
+ """Compress *inp* with libx264; return the new file."""
83
  out = inp.with_name(f"{inp.stem}_compressed.mp4")
84
  try:
85
  ffmpeg.input(str(inp)).output(
 
91
 
92
 
93
  def _maybe_compress(path: Path, limit_mb: int) -> Tuple[Path, bool]:
94
+ """Compress *path* if it exceeds *limit_mb*."""
95
  size_mb = path.stat().st_size / (1024 * 1024)
96
  if size_mb <= limit_mb:
97
  return path, False
 
99
 
100
 
101
  def _download_direct(url: str, dst: Path) -> Path:
102
+ """Simple HTTP GET download."""
103
  r = requests.get(url, stream=True, timeout=30)
104
  r.raise_for_status()
105
  out = dst / _sanitize_filename(url.split("/")[-1])
 
111
 
112
 
113
  def _download_with_yt_dlp(url: str, dst: Path, password: str = "") -> Path:
114
+ """Download via yt‑dlp, returning the newest MP4."""
115
  tmpl = str(dst / "%(id)s.%(ext)s")
 
116
  fmt = "best[ext=mp4]/best"
117
 
118
  opts = {
 
121
  "quiet": True,
122
  "noprogress": True,
123
  "nocheckcertificate": True,
124
+ "merge_output_format": "mp4",
125
+ "fragment_retries": 0,
126
  }
127
  if password:
128
  opts["videopassword"] = password
 
147
  try:
148
  with yt_dlp.YoutubeDL(opts) as ydl:
149
  ydl.extract_info(url, download=True)
 
 
150
  finally:
151
  progress_bar.empty()
152
  status_text.empty()
153
 
 
154
  mp4_files = list(dst.glob("*.mp4"))
155
  if not mp4_files:
156
  raise RuntimeError("No MP4 file was created.")
157
  newest = max(mp4_files, key=lambda p: p.stat().st_mtime)
158
 
159
+ # Deduplicate via SHA‑256 cache
160
  sha = _file_sha256(newest)
161
  if sha:
162
  for existing in dst.iterdir():
163
  if existing != newest and _file_sha256(existing) == sha:
164
+ newest.unlink()
165
  return existing
 
166
  return newest
167
 
168
 
169
  def download_video(url: str, dst: Path, password: str = "") -> Path:
170
+ """Unified download entry point."""
 
 
 
 
 
 
 
171
  video_exts = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
172
 
173
  if url.lower().endswith(video_exts):
 
184
  return download_video(u.expandedUrl, dst)
185
  raise RuntimeError("No video found in the tweet.")
186
 
 
187
  return _download_with_yt_dlp(url, dst, password)
188
 
189
 
190
  def _encode_video_b64(path: Path) -> str:
191
+ """Base64‑encode a file."""
192
  return base64.b64encode(path.read_bytes()).decode()
193
 
194
 
195
+ def generate_report(video_path: Path, prompt: str, model_id: str, timeout: int = 300) -> str:
196
+ """Send video + prompt to Gemini and return the response text."""
 
 
 
 
 
197
  b64 = _encode_video_b64(video_path)
198
  video_part = {"inline_data": {"mime_type": "video/mp4", "data": b64}}
199
  model = genai.GenerativeModel(model_name=model_id)
 
207
 
208
 
209
  def _strip_prompt_echo(prompt: str, text: str, threshold: float = 0.68) -> str:
210
+ """Trim the prompt if the model repeats it at the start."""
211
  if not prompt or not text:
212
  return text
 
213
  clean_prompt = " ".join(prompt.lower().split())
214
  snippet = " ".join(text.lower().split()[:600])
 
215
  if SequenceMatcher(None, clean_prompt, snippet).ratio() > threshold:
216
  cut = max(len(clean_prompt), int(len(prompt) * 0.9))
217
  return text[cut:].lstrip(" \n:-")
 
222
  # UI helpers
223
  # ----------------------------------------------------------------------
224
  def _expand_sidebar(width: int = 380) -> None:
225
+ """Make the Streamlit sidebar wider."""
226
  st.markdown(
227
  f"""
228
  <style>
 
235
  unsafe_allow_html=True,
236
  )
237
 
238
+ # ----------------------------------------------------------------------
239
+ # Session‑state defaults
240
+ # ----------------------------------------------------------------------
241
+ def _init_state() -> None:
242
+ """Populate Streamlit's session_state with sensible defaults."""
243
+ defaults = {
244
+ "url": "",
245
+ "video_path": "",
246
+ "model_input": DEFAULT_MODEL,
247
+ "prompt": DEFAULT_PROMPT,
248
+ "api_key": os.getenv("GOOGLE_API_KEY", "AIzaSyBiAW2GQLid0HGe9Vs_ReKwkwsSVNegNzs"),
249
+ "video_password": "",
250
+ "compress_mb": 200,
251
+ "busy": False,
252
+ "last_error": "",
253
+ "analysis_out": "",
254
+ "raw_output": "",
255
+ "last_error_detail": "",
256
+ "show_raw_on_error": False,
257
+ "show_analysis": False,
258
+ }
259
+ for k, v in defaults.items():
260
+ st.session_state.setdefault(k, v)
261
+
262
 
263
  # ----------------------------------------------------------------------
264
+ # Main entry point
265
  # ----------------------------------------------------------------------
266
+ if __name__ == "__main__":
267
+ # Initialise session state before any UI code runs
268
+ _init_state()
269
+
270
+ # Initialise the Gemini API – the key can be supplied via the sidebar or env var
271
+ if st.session_state["api_key"]:
272
+ genai.configure(api_key=st.session_state["api_key"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
+ # Run the Streamlit app
275
+ main()