Hug0endob commited on
Commit
fc82f2f
·
verified ·
1 Parent(s): a600119

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +328 -295
streamlit_app.py CHANGED
@@ -1,24 +1,32 @@
1
- # streamlit_app.py
2
- import requests # for direct‑video download
3
- import snscrape.modules.twitter as sntwitter # Twitter scraper
4
  import base64
5
  import hashlib
6
- import os
7
  import string
8
  import traceback
9
- from glob import glob
10
  from pathlib import Path
 
11
 
 
 
12
  import ffmpeg
13
  import google.generativeai as genai
14
- import streamlit as st
15
  import yt_dlp
16
- from dotenv import load_dotenv
17
 
18
- load_dotenv()
 
 
 
 
 
 
 
 
 
 
19
 
20
  # ----------------------------------------------------------------------
21
- # Configuration & constants
22
  # ----------------------------------------------------------------------
23
  DATA_DIR = Path("./data")
24
  DATA_DIR.mkdir(exist_ok=True)
@@ -37,11 +45,8 @@ DEFAULT_PROMPT = (
37
  "Include a list of observations for notable events."
38
  )
39
 
40
- # ----------------------------------------------------------------------
41
- # Session defaults
42
- # ----------------------------------------------------------------------
43
- st.set_page_config(page_title="Video Analysis Tool", layout="wide")
44
- for key, val in {
45
  "url": "",
46
  "videos": "",
47
  "loop_video": False,
@@ -55,18 +60,24 @@ for key, val in {
55
  "processing_timeout": 900,
56
  "generation_timeout": 300,
57
  "compress_threshold_mb": 200,
58
- }.items():
59
- st.session_state.setdefault(key, val)
 
60
 
61
  # ----------------------------------------------------------------------
62
  # Helper utilities
63
  # ----------------------------------------------------------------------
 
 
 
64
  def sanitize_filename(p: str) -> str:
 
65
  name = Path(p).name.lower()
66
  return name.translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
67
 
68
 
69
- def file_sha256(p: str, block: int = 65536) -> str | None:
 
70
  try:
71
  h = hashlib.sha256()
72
  with open(p, "rb") as f:
@@ -78,73 +89,126 @@ def file_sha256(p: str, block: int = 65536) -> str | None:
78
 
79
 
80
  def convert_to_mp4(src: str) -> str:
 
81
  dst = str(Path(src).with_suffix(".mp4"))
82
  if os.path.exists(dst):
83
  return dst
84
- ffmpeg.input(src).output(dst).run(overwrite_output=True, quiet=True)
85
- if os.path.exists(dst):
 
 
 
 
 
 
 
86
  os.remove(src)
87
  return dst
88
 
89
 
90
  def compress_video(inp: str, out: str, crf: int = 28, preset: str = "fast") -> str:
91
- ffmpeg.input(inp).output(out, vcodec="libx264", crf=crf, preset=preset).run(
92
- overwrite_output=True, quiet=True
93
- )
 
 
 
 
94
  return out if os.path.exists(out) else inp
95
 
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def download_video(url: str, dst_dir: str, password: str = "") -> str:
98
  """
99
  Download a video from *url*.
100
- - If the URL ends with a known video extensionplain HTTP download.
101
- - If it is a YouTube / archive URL try yt‑dlp (will raise a clear error if DNS fails).
102
- - If it is a Twitter status URL scrape the tweet for the first video URL.
 
103
  """
104
  video_exts = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
 
105
 
106
  # --------------------------------------------------------------
107
- # 1️⃣ Direct video file (no yt‑dlp needed)
108
  # --------------------------------------------------------------
109
  if url.lower().endswith(video_exts):
110
  try:
111
  r = requests.get(url, stream=True, timeout=30)
112
  r.raise_for_status()
113
  filename = sanitize_filename(url.split("/")[-1])
114
- out_path = Path(dst_dir) / filename
115
  with open(out_path, "wb") as f:
116
  for chunk in r.iter_content(chunk_size=8192):
117
  if chunk:
118
  f.write(chunk)
119
- return str(out_path) # already an MP4 (or other supported format)
120
  except Exception as e:
121
- raise RuntimeError(f"Direct download failed: {e}")
122
 
123
  # --------------------------------------------------------------
124
- # 2️⃣ Twitter status – scrape for video URL
125
  # --------------------------------------------------------------
126
  if "twitter.com" in url and "/status/" in url:
127
  try:
128
  tweet_id = url.split("/")[-1].split("?")[0]
129
  for tweet in sntwitter.TwitterTweetScraper(tweet_id).get_items():
130
- # Look for media URLs in the tweet JSON
131
- media = tweet.media
132
- if media:
133
- for m in media:
134
  if getattr(m, "video_url", None):
135
- return download_video(m.video_url, dst_dir) # recurse as a direct URL
136
- # If no media field, fall back to expanded URLs
137
- for u in tweet.urls:
138
  if u.expandedUrl.lower().endswith(video_exts):
139
- return download_video(u.expandedUrl, dst_dir)
140
  raise RuntimeError("No video found in the tweet.")
141
  except Exception as e:
142
- raise RuntimeError(f"Twitter scrape failed: {e}")
143
 
144
  # --------------------------------------------------------------
145
- # 3️⃣ Fallback to yt‑dlp (YouTube, archive.org, etc.)
146
  # --------------------------------------------------------------
147
- tmpl = str(Path(dst_dir) / "%(id)s.%(ext)s")
148
  opts = {"outtmpl": tmpl, "format": "best"}
149
  if password:
150
  opts["videopassword"] = password
@@ -153,282 +217,251 @@ def download_video(url: str, dst_dir: str, password: str = "") -> str:
153
  with yt_dlp.YoutubeDL(opts) as ydl:
154
  info = ydl.extract_info(url, download=True)
155
  except Exception as e:
156
- # Re‑raise with a short, user‑friendly message
157
  raise RuntimeError(
158
  f"yt‑dlp could not download the URL. "
159
  f"Common reasons: DNS failure, unsupported site, or missing video. "
160
  f"Original error: {e}"
161
- )
162
 
163
- # Locate the downloaded file (same logic as before)
164
- if isinstance(info, dict) and info.get("id"):
165
  vid_id = info["id"]
166
  ext = info.get("ext", "mp4")
167
- candidate = str(Path(dst_dir) / f"{vid_id}.{ext}")
168
- if os.path.exists(candidate):
169
- return convert_to_mp4(candidate)
170
-
171
- # Fallback: newest file in the folder
172
- newest = max(glob(os.path.join(dst_dir, "*")), key=os.path.getmtime)
173
- return convert_to_mp4(newest)
174
-
175
-
176
- def maybe_compress(path: str, limit_mb: int) -> str:
177
- size_mb = os.path.getsize(path) / (1024 * 1024)
178
- if size_mb <= limit_mb:
179
- return path
180
- out = str(Path(path).with_name(f"{Path(path).stem}_compressed.mp4"))
181
- return compress_video(path, out)
182
 
183
-
184
- def strip_prompt_echo(prompt: str, text: str) -> str:
185
- if not prompt or not text:
186
- return text
187
- clean_prompt = " ".join(prompt.lower().split())
188
- snippet = " ".join(text.lower().split()[:600])
189
- from difflib import SequenceMatcher
190
-
191
- if SequenceMatcher(None, clean_prompt, snippet).ratio() > 0.68:
192
- cut = max(len(clean_prompt), int(len(prompt) * 0.9))
193
- return text[cut:].lstrip(" \n:-")
194
- return text
195
-
196
-
197
- def generate_inline(video_path: str, prompt: str, model_id: str, timeout: int) -> str:
198
- # 1️⃣ read video and encode as base64
199
- with open(video_path, "rb") as f:
200
- b64 = base64.b64encode(f.read()).decode()
201
-
202
- # 2️⃣ build the “inline” part that the Gemini API expects
203
- video_part = {
204
- "inline_data": {"mime_type": "video/mp4", "data": b64}
205
- }
206
- contents = [prompt, video_part]
207
-
208
- # 3️⃣ create a GenerativeModel object (no Client class any more)
209
- model = genai.GenerativeModel(model_name=model_id)
210
-
211
- # 4️⃣ call generate_content – the timeout is passed via the request options
212
- resp = model.generate_content(
213
- contents,
214
- generation_config={"max_output_tokens": 1024},
215
- # `timeout` is a kw‑arg of the underlying HTTP request; the SDK forwards it
216
- request_options={"timeout": timeout},
217
- )
218
-
219
- # 5️⃣ the response object has a `.text` attribute (or `.parts` for multi‑part)
220
- return getattr(resp, "text", str(resp))
221
 
222
 
223
  # ----------------------------------------------------------------------
224
- # Sidebar – inputs & settings
225
  # ----------------------------------------------------------------------
226
- st.sidebar.header("Video Input")
227
- st.sidebar.text_input("Video URL", key="url", placeholder="https://")
228
-
229
- with st.sidebar.expander("Settings", expanded=False):
230
- model = st.selectbox("Model", MODEL_OPTIONS, index=MODEL_OPTIONS.index(DEFAULT_MODEL))
231
- if model == "custom":
232
- model = st.text_input("Custom model ID", value=DEFAULT_MODEL, key="custom_model")
233
- st.session_state["model_input"] = model
234
-
235
- st.text_input("Google API Key", key="api_key", type="password")
236
- st.text_area("Analysis prompt", value=DEFAULT_PROMPT, key="prompt", height=140)
237
- st.text_input("Video password (if needed)", key="video_password", type="password")
238
- st.number_input(
239
- "Processing timeout (s)",
240
- min_value=60,
241
- max_value=3600,
242
- value=st.session_state["processing_timeout"],
243
- step=30,
244
- key="processing_timeout",
245
- )
246
- st.number_input(
247
- "Generation timeout (s)",
248
- min_value=30,
249
- max_value=1800,
250
- value=st.session_state["generation_timeout"],
251
- step=10,
252
- key="generation_timeout",
253
- )
254
- st.number_input(
255
- "Compress if > (MB)",
256
- min_value=10,
257
- max_value=2000,
258
- value=st.session_state["compress_threshold_mb"],
259
- step=10,
260
- key="compress_threshold_mb",
261
- )
262
 
263
- # ----------------------------------------------------------------------
264
- # Load / preview video
265
- # ----------------------------------------------------------------------
266
- if st.sidebar.button("Load Video"):
267
- try:
268
- path = download_video(
269
- st.session_state["url"], str(DATA_DIR), st.session_state["video_password"]
270
- )
271
- st.session_state["videos"] = path
272
- st.session_state["last_error"] = ""
273
- except Exception as e:
274
- st.session_state["last_error"] = f"Download failed: {e}"
275
- st.sidebar.error(st.session_state["last_error"])
276
-
277
- # --------------------------------------------------------------
278
- # 📥 Twitter video extractor – hidden inside an expander
279
- # --------------------------------------------------------------
280
- with st.sidebar.expander("🔎 Extract video(s) from a Tweet", expanded=False):
281
- tweet_url = st.text_input(
282
- "Tweet URL (e.g. https://twitter.com/user/status/1234567890)",
283
- key="tweet_url",
284
- )
285
 
286
- # ------------------------------------------------------------------
287
- # Step 1 – Scrape the tweet and collect every video URL
288
- # ------------------------------------------------------------------
289
- if st.button("Find videos in tweet"):
290
- if not tweet_url:
291
- st.error("Paste a tweet URL first.")
292
- else:
293
- try:
294
- tweet_id = tweet_url.split("/")[-1].split("?")[0]
295
- video_urls: list[str] = []
296
-
297
- for tweet in sntwitter.TwitterTweetScraper(tweet_id).get_items():
298
- # media objects (video, gif, etc.)
299
- if getattr(tweet, "media", None):
300
- for m in tweet.media:
301
- if getattr(m, "video_url", None):
302
- video_urls.append(m.video_url)
303
-
304
- # expanded URLs that already end with a video extension
305
- for u in getattr(tweet, "urls", []):
306
- if u.expandedUrl.lower().endswith(
307
- (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
308
- ):
309
- video_urls.append(u.expandedUrl)
310
-
311
- # deduplicate while preserving order
312
- video_urls = list(dict.fromkeys(video_urls))
313
-
314
- if not video_urls:
315
- raise RuntimeError("No video URLs detected in this tweet.")
316
-
317
- # store a friendly label + raw URL for the selector
318
- st.session_state["tweet_video_options"] = [
319
- (f"Video {i+1} – {url.split('/')[-1][:30]}...", url)
320
- for i, url in enumerate(video_urls)
321
- ]
322
- st.success(f"Found {len(video_urls)} video(s). Choose one below.")
323
- except Exception as e:
324
- st.session_state["tweet_video_options"] = []
325
- st.error(f"Tweet scrape failed: {e}")
326
-
327
- # ------------------------------------------------------------------
328
- # Step 2 – Let the user pick one of the discovered videos
329
- # ------------------------------------------------------------------
330
- if st.session_state.get("tweet_video_options"):
331
- display_names, raw_urls = zip(*st.session_state["tweet_video_options"])
332
- sel_idx = st.selectbox(
333
- "Select video to download",
334
- options=range(len(display_names)),
335
- format_func=lambda i: display_names[i],
336
- key="tweet_video_select",
337
  )
 
 
 
 
 
338
 
339
- # ------------------------------------------------------------------
340
- # Step 3 – Download the chosen video
341
- # ------------------------------------------------------------------
342
- if st.button("Download selected video"):
343
- chosen_url = raw_urls[sel_idx]
344
- try:
345
- path = download_video(chosen_url, str(DATA_DIR))
346
- st.session_state["videos"] = path
347
- st.session_state["last_error"] = ""
348
- st.success("Video downloaded and loaded.")
349
- except Exception as e:
350
- st.session_state["last_error"] = f"Download failed: {e}"
351
- st.error(st.session_state["last_error"])
352
- else:
353
- st.info(
354
- "Paste a tweet URL and click **Find videos in tweet** to discover available videos."
355
- )
356
 
357
- if st.session_state["videos"]:
358
- try:
359
- mp4 = convert_to_mp4(st.session_state["videos"])
360
- with open(mp4, "rb") as f:
361
- video_bytes = f.read()
362
- st.sidebar.video(video_bytes, format="video/mp4")
363
- except Exception:
364
- st.sidebar.write("Preview unavailable")
365
 
366
- if st.sidebar.button("Clear Video"):
367
- for f in glob(str(DATA_DIR / "*")):
368
- try:
369
- os.remove(f)
370
- except Exception:
371
- pass
372
- st.session_state.update(
373
- {
374
- "videos": "",
375
- "analysis_out": "",
376
- "last_error": "",
377
- "busy": False,
378
- }
 
 
 
 
 
 
 
 
 
 
379
  )
380
 
381
- # ----------------------------------------------------------------------
382
- # Generation
383
- # ----------------------------------------------------------------------
384
- col1, col2 = st.columns([1, 3])
385
- with col1:
386
- generate_now = st.button("Generate analysis", type="primary")
387
- with col2:
388
- if not st.session_state["videos"]:
389
- st.info("Load a video first.", icon="ℹ️")
390
-
391
- if generate_now and not st.session_state["busy"]:
392
- if not st.session_state["videos"]:
393
- st.error("No video loaded.")
394
- elif not (st.session_state["api_key"] or os.getenv("GOOGLE_API_KEY")):
395
- st.error("Google API key missing.")
396
- else:
397
- st.session_state["busy"] = True
398
  try:
399
- genai.configure(api_key=st.session_state["api_key"] or os.getenv("GOOGLE_API_KEY"))
400
- model_id = st.session_state["model_input"]
401
- prompt = st.session_state["prompt"]
402
-
403
- # Optional compression to stay under inline limits
404
- video_path = maybe_compress(
405
- st.session_state["videos"], st.session_state["compress_threshold_mb"]
406
- )
407
-
408
- with st.spinner("Generating…"):
409
- raw_out = generate_inline(
410
- video_path, prompt, model_id, st.session_state["generation_timeout"]
411
  )
412
- out = strip_prompt_echo(prompt, raw_out)
413
- st.session_state["analysis_out"] = out
414
- st.subheader("Analysis Result")
415
- st.markdown(out or "No output.")
416
  except Exception as e:
417
- # Capture full traceback for debugging while showing a short message to the user
418
- tb = traceback.format_exc()
419
- st.session_state["last_error"] = f"Generation error: {e}"
420
- st.error("An error occurred during generation. Check the log below.")
421
- st.code(tb, language="text")
422
- finally:
423
- st.session_state["busy"] = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
 
425
- # ----------------------------------------------------------------------
426
- # Show results / errors
427
- # ----------------------------------------------------------------------
428
- if st.session_state["analysis_out"]:
429
- st.subheader("📝 Analysis")
430
- st.markdown(st.session_state["analysis_out"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
 
432
- if st.session_state["last_error"]:
433
- st.subheader("❗️ Error log")
434
- st.code(st.session_state["last_error"], language="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import os
 
3
  import base64
4
  import hashlib
 
5
  import string
6
  import traceback
 
7
  from pathlib import Path
8
+ from typing import List, Tuple, Optional
9
 
10
+ import requests
11
+ import streamlit as st
12
  import ffmpeg
13
  import google.generativeai as genai
 
14
  import yt_dlp
 
15
 
16
+ # ----------------------------------------------------------------------
17
+ # Optional imports – give a clear message if missing
18
+ # ----------------------------------------------------------------------
19
+ try:
20
+ import snscrape.modules.twitter as sntwitter
21
+ except ImportError: # pragma: no cover
22
+ st.error(
23
+ "The package `snscrape` is required for Twitter video extraction. "
24
+ "Install it with `pip install snscrape`."
25
+ )
26
+ st.stop()
27
 
28
  # ----------------------------------------------------------------------
29
+ # Configuration & defaults
30
  # ----------------------------------------------------------------------
31
  DATA_DIR = Path("./data")
32
  DATA_DIR.mkdir(exist_ok=True)
 
45
  "Include a list of observations for notable events."
46
  )
47
 
48
+ # Session‑state defaults (kept in one dict for readability)
49
+ DEFAULT_SESSION_STATE = {
 
 
 
50
  "url": "",
51
  "videos": "",
52
  "loop_video": False,
 
60
  "processing_timeout": 900,
61
  "generation_timeout": 300,
62
  "compress_threshold_mb": 200,
63
+ }
64
+ for k, v in DEFAULT_SESSION_STATE.items():
65
+ st.session_state.setdefault(k, v)
66
 
67
  # ----------------------------------------------------------------------
68
  # Helper utilities
69
  # ----------------------------------------------------------------------
70
+ from difflib import SequenceMatcher # imported once for clarity
71
+
72
+
73
  def sanitize_filename(p: str) -> str:
74
+ """Return a lower‑case, punctuation‑free filename."""
75
  name = Path(p).name.lower()
76
  return name.translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
77
 
78
 
79
+ def file_sha256(p: str, block: int = 65536) -> Optional[str]:
80
+ """SHA‑256 hash of a file; returns None on error."""
81
  try:
82
  h = hashlib.sha256()
83
  with open(p, "rb") as f:
 
89
 
90
 
91
  def convert_to_mp4(src: str) -> str:
92
+ """Convert *src* to MP4 with ffmpeg; returns the MP4 path."""
93
  dst = str(Path(src).with_suffix(".mp4"))
94
  if os.path.exists(dst):
95
  return dst
96
+ try:
97
+ ffmpeg.input(src).output(dst).overwrite_output().run(
98
+ capture_stdout=True, capture_stderr=True
99
+ )
100
+ except ffmpeg.Error as e:
101
+ raise RuntimeError(f"ffmpeg conversion failed: {e.stderr.decode()}") from e
102
+
103
+ # Delete source only if conversion succeeded and output is non‑empty
104
+ if os.path.exists(dst) and os.path.getsize(dst) > 0:
105
  os.remove(src)
106
  return dst
107
 
108
 
109
  def compress_video(inp: str, out: str, crf: int = 28, preset: str = "fast") -> str:
110
+ """Compress *inp* to *out* using libx264."""
111
+ try:
112
+ ffmpeg.input(inp).output(
113
+ out, vcodec="libx264", crf=crf, preset=preset
114
+ ).overwrite_output().run(capture_stdout=True, capture_stderr=True)
115
+ except ffmpeg.Error as e:
116
+ raise RuntimeError(f"ffmpeg compression failed: {e.stderr.decode()}") from e
117
  return out if os.path.exists(out) else inp
118
 
119
 
120
+ def maybe_compress(path: str, limit_mb: int) -> Tuple[str, bool]:
121
+ """Compress *path* if its size exceeds *limit_mb*.
122
+ Returns (final_path, was_compressed)."""
123
+ size_mb = os.path.getsize(path) / (1024 * 1024)
124
+ if size_mb <= limit_mb:
125
+ return path, False
126
+ out = str(Path(path).with_name(f"{Path(path).stem}_compressed.mp4"))
127
+ return compress_video(path, out), True
128
+
129
+
130
+ def strip_prompt_echo(prompt: str, text: str, threshold: float = 0.68) -> str:
131
+ """Remove the prompt if the model repeats it at the start of *text*."""
132
+ if not prompt or not text:
133
+ return text
134
+ clean_prompt = " ".join(prompt.lower().split())
135
+ snippet = " ".join(text.lower().split()[:600])
136
+
137
+ if SequenceMatcher(None, clean_prompt, snippet).ratio() > threshold:
138
+ cut = max(len(clean_prompt), int(len(prompt) * 0.9))
139
+ return text[cut:].lstrip(" \n:-")
140
+ return text
141
+
142
+
143
+ def generate_inline(
144
+ video_path: str, prompt: str, model_id: str, timeout: int
145
+ ) -> str:
146
+ """Encode *video_path* as base64 and call Gemini."""
147
+ with open(video_path, "rb") as f:
148
+ b64 = base64.b64encode(f.read()).decode()
149
+
150
+ video_part = {"inline_data": {"mime_type": "video/mp4", "data": b64}}
151
+ contents = [prompt, video_part]
152
+
153
+ model = genai.GenerativeModel(model_name=model_id)
154
+ resp = model.generate_content(
155
+ contents,
156
+ generation_config={"max_output_tokens": 1024},
157
+ request_options={"timeout": timeout},
158
+ )
159
+ return getattr(resp, "text", str(resp))
160
+
161
+
162
  def download_video(url: str, dst_dir: str, password: str = "") -> str:
163
  """
164
  Download a video from *url*.
165
+ 1️⃣ Direct video file → HTTP GET.
166
+ 2️⃣ Twitter statusscrape for video URL.
167
+ 3️⃣ Fallbackyt‑dlp (YouTube, archive.org, etc.).
168
+ Returns the path to an MP4 file.
169
  """
170
  video_exts = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
171
+ dst_dir = Path(dst_dir)
172
 
173
  # --------------------------------------------------------------
174
+ # 1️⃣ Direct video file
175
  # --------------------------------------------------------------
176
  if url.lower().endswith(video_exts):
177
  try:
178
  r = requests.get(url, stream=True, timeout=30)
179
  r.raise_for_status()
180
  filename = sanitize_filename(url.split("/")[-1])
181
+ out_path = dst_dir / filename
182
  with open(out_path, "wb") as f:
183
  for chunk in r.iter_content(chunk_size=8192):
184
  if chunk:
185
  f.write(chunk)
186
+ return str(out_path)
187
  except Exception as e:
188
+ raise RuntimeError(f"Direct download failed: {e}") from e
189
 
190
  # --------------------------------------------------------------
191
+ # 2️⃣ Twitter status
192
  # --------------------------------------------------------------
193
  if "twitter.com" in url and "/status/" in url:
194
  try:
195
  tweet_id = url.split("/")[-1].split("?")[0]
196
  for tweet in sntwitter.TwitterTweetScraper(tweet_id).get_items():
197
+ if getattr(tweet, "media", None):
198
+ for m in tweet.media:
 
 
199
  if getattr(m, "video_url", None):
200
+ return download_video(m.video_url, str(dst_dir))
201
+ for u in getattr(tweet, "urls", []):
 
202
  if u.expandedUrl.lower().endswith(video_exts):
203
+ return download_video(u.expandedUrl, str(dst_dir))
204
  raise RuntimeError("No video found in the tweet.")
205
  except Exception as e:
206
+ raise RuntimeError(f"Twitter scrape failed: {e}") from e
207
 
208
  # --------------------------------------------------------------
209
+ # 3️⃣ yt‑dlp fallback
210
  # --------------------------------------------------------------
211
+ tmpl = str(dst_dir / "%(id)s.%(ext)s")
212
  opts = {"outtmpl": tmpl, "format": "best"}
213
  if password:
214
  opts["videopassword"] = password
 
217
  with yt_dlp.YoutubeDL(opts) as ydl:
218
  info = ydl.extract_info(url, download=True)
219
  except Exception as e:
 
220
  raise RuntimeError(
221
  f"yt‑dlp could not download the URL. "
222
  f"Common reasons: DNS failure, unsupported site, or missing video. "
223
  f"Original error: {e}"
224
+ ) from e
225
 
226
+ # If yt‑dlp gave us a predictable filename, use it
227
+ if isinstance(info, dict) and "id" in info:
228
  vid_id = info["id"]
229
  ext = info.get("ext", "mp4")
230
+ candidate = dst_dir / f"{vid_id}.{ext}"
231
+ if candidate.exists():
232
+ return convert_to_mp4(str(candidate))
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
+ # Fallback: newest file in the folder (yt‑dlp sometimes uses different naming)
235
+ if not any(dst_dir.iterdir()):
236
+ raise RuntimeError("yt‑dlp did not download any files.")
237
+ newest = max(dst_dir.iterdir(), key=lambda p: p.stat().st_mtime)
238
+ return convert_to_mp4(str(newest))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
 
241
  # ----------------------------------------------------------------------
242
+ # Streamlit UI
243
  # ----------------------------------------------------------------------
244
+ def main() -> None:
245
+ st.set_page_config(page_title="Video Analysis Tool", layout="wide")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
+ # ---------- Sidebar inputs ----------
248
+ st.sidebar.header("Video Input")
249
+ st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
+ with st.sidebar.expander("Settings", expanded=False):
252
+ model = st.selectbox(
253
+ "Model", MODEL_OPTIONS, index=MODEL_OPTIONS.index(DEFAULT_MODEL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  )
255
+ if model == "custom":
256
+ model = st.text_input(
257
+ "Custom model ID", value=DEFAULT_MODEL, key="custom_model"
258
+ )
259
+ st.session_state["model_input"] = model
260
 
261
+ # API key – prefer Streamlit secrets, fall back to env / session state
262
+ secret_key = st.secrets.get("google_api_key")
263
+ if secret_key:
264
+ st.session_state["api_key"] = secret_key
265
+ st.text_input("Google API Key", key="api_key", type="password")
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
+ st.text_area(
268
+ "Analysis prompt", value=DEFAULT_PROMPT, key="prompt", height=140
269
+ )
270
+ st.text_input(
271
+ "Video password (if needed)", key="video_password", type="password"
272
+ )
 
 
273
 
274
+ st.number_input(
275
+ "Processing timeout (s)",
276
+ min_value=60,
277
+ max_value=3600,
278
+ value=st.session_state["processing_timeout"],
279
+ step=30,
280
+ key="processing_timeout",
281
+ )
282
+ st.number_input(
283
+ "Generation timeout (s)",
284
+ min_value=30,
285
+ max_value=1800,
286
+ value=st.session_state["generation_timeout"],
287
+ step=10,
288
+ key="generation_timeout",
289
+ )
290
+ st.number_input(
291
+ "Compress if > (MB)",
292
+ min_value=10,
293
+ max_value=2000,
294
+ value=st.session_state["compress_threshold_mb"],
295
+ step=10,
296
+ key="compress_threshold_mb",
297
  )
298
 
299
+ # ---------- Load video ----------
300
+ if st.sidebar.button("Load Video"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  try:
302
+ with st.spinner("Downloading video…"):
303
+ path = download_video(
304
+ st.session_state["url"], str(DATA_DIR), st.session_state["video_password"]
 
 
 
 
 
 
 
 
 
305
  )
306
+ st.session_state["videos"] = path
307
+ st.session_state["last_error"] = ""
308
+ st.success("Video loaded successfully.")
 
309
  except Exception as e:
310
+ st.session_state["last_error"] = f"Download failed: {e}"
311
+ st.sidebar.error(st.session_state["last_error"])
312
+
313
+ # ---------- Twitter extractor ----------
314
+ with st.sidebar.expander("🔎 Extract video(s) from a Tweet", expanded=False):
315
+ tweet_url = st.text_input(
316
+ "Tweet URL (e.g. https://twitter.com/user/status/1234567890)",
317
+ key="tweet_url",
318
+ )
319
+ if st.button("Find videos in tweet"):
320
+ if not tweet_url:
321
+ st.error("Paste a tweet URL first.")
322
+ else:
323
+ try:
324
+ tweet_id = tweet_url.split("/")[-1].split("?")[0]
325
+ video_urls: List[str] = []
326
+ for tweet in sntwitter.TwitterTweetScraper(tweet_id).get_items():
327
+ if getattr(tweet, "media", None):
328
+ for m in tweet.media:
329
+ if getattr(m, "video_url", None):
330
+ video_urls.append(m.video_url)
331
+ for u in getattr(tweet, "urls", []):
332
+ if u.expandedUrl.lower().endswith(
333
+ (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
334
+ ):
335
+ video_urls.append(u.expandedUrl)
336
+
337
+ video_urls = list(dict.fromkeys(video_urls)) # dedupe, preserve order
338
+ if not video_urls:
339
+ raise RuntimeError("No video URLs detected in this tweet.")
340
+
341
+ st.session_state["tweet_video_options"] = [
342
+ (f"Video {i+1} – {url.split('/')[-1][:30]}...", url)
343
+ for i, url in enumerate(video_urls)
344
+ ]
345
+ st.success(f"Found {len(video_urls)} video(s).")
346
+ except Exception as e:
347
+ st.session_state["tweet_video_options"] = []
348
+ st.error(f"Tweet scrape failed: {e}")
349
+
350
+ # selector & download
351
+ if st.session_state.get("tweet_video_options"):
352
+ labels, urls = zip(*st.session_state["tweet_video_options"])
353
+ sel = st.selectbox(
354
+ "Select video to download",
355
+ options=range(len(labels)),
356
+ format_func=lambda i: labels[i],
357
+ key="tweet_video_select",
358
+ )
359
+ if st.button("Download selected video"):
360
+ try:
361
+ with st.spinner("Downloading selected video…"):
362
+ path = download_video(urls[sel], str(DATA_DIR))
363
+ st.session_state["videos"] = path
364
+ st.session_state["last_error"] = ""
365
+ st.success("Video downloaded and loaded.")
366
+ except Exception as e:
367
+ st.session_state["last_error"] = f"Download failed: {e}"
368
+ st.error(st.session_state["last_error"])
369
+ else:
370
+ st.info(
371
+ "Paste a tweet URL and click **Find videos in tweet** to discover available videos."
372
+ )
373
 
374
+ # ---------- Video preview ----------
375
+ if st.session_state["videos"]:
376
+ try:
377
+ mp4_path = convert_to_mp4(st.session_state["videos"])
378
+ st.sidebar.video(str(mp4_path))
379
+ except Exception:
380
+ st.sidebar.write("Preview unavailable")
381
+
382
+ if st.sidebar.button("Clear Video"):
383
+ for f in DATA_DIR.iterdir():
384
+ try:
385
+ f.unlink()
386
+ except Exception:
387
+ pass
388
+ st.session_state.update(
389
+ {
390
+ "videos": "",
391
+ "analysis_out": "",
392
+ "last_error": "",
393
+ "busy": False,
394
+ }
395
+ )
396
+ st.success("Session cleared.")
397
 
398
+ # ---------- Generation ----------
399
+ col1, col2 = st.columns([1, 3])
400
+ with col1:
401
+ generate_now = st.button(
402
+ "Generate analysis", type="primary", disabled=st.session_state["busy"]
403
+ )
404
+ with col2:
405
+ if not st.session_state["videos"]:
406
+ st.info("Load a video first.", icon="ℹ️")
407
+
408
+ if generate_now and not st.session_state["busy"]:
409
+ api_key = st.session_state["api_key"] or os.getenv("GOOGLE_API_KEY")
410
+ if not st.session_state["videos"]:
411
+ st.error("No video loaded.")
412
+ elif not api_key:
413
+ st.error("Google API key missing.")
414
+ else:
415
+ try:
416
+ st.session_state["busy"] = True
417
+ genai.configure(api_key=api_key)
418
+ model_id = st.session_state["model_input"]
419
+ prompt = st.session_state["prompt"]
420
+
421
+ # ---- optional compression ----
422
+ with st.spinner("Checking video size…"):
423
+ video_path, was_compressed = maybe_compress(
424
+ st.session_state["videos"],
425
+ st.session_state["compress_threshold_mb"],
426
+ )
427
+
428
+ # ---- generation ----
429
+ with st.spinner("Generating analysis…"):
430
+ raw_out = generate_inline(
431
+ video_path,
432
+ prompt,
433
+ model_id,
434
+ st.session_state["generation_timeout"],
435
+ )
436
+
437
+ # clean up temporary compressed file
438
+ if was_compressed:
439
+ try:
440
+ os.remove(video_path)
441
+ except OSError:
442
+ pass
443
+
444
+ out = strip_prompt_echo(prompt, raw_out)
445
+ st.session_state["analysis_out"] = out
446
+ st.success("Analysis generated successfully.")
447
+ st.markdown(out or "No output.")
448
+ except Exception as exc:
449
+ tb = traceback.format_exc()
450
+ st.session_state["last_error"] = f"Generation error: {exc}"
451
+ st.error("An error occurred during generation.")
452
+ st.code(tb, language="text")
453
+ finally:
454
+ st.session_state["busy"] = False
455
+
456
+ # ---------- Results / errors ----------
457
+ if st.session_state["analysis_out"]:
458
+ st.subheader("📝 Analysis")
459
+ st.markdown(st.session_state["analysis_out"])
460
+
461
+ if st.session_state["last_error"]:
462
+ with st.expander("❗️ Error details"):
463
+ st.code(st.session_state["last_error"], language="text")
464
+
465
+
466
+ if __name__ == "__main__":
467
+ main()