Hug0endob commited on
Commit
9775738
·
verified ·
1 Parent(s): bf89690

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +84 -3
streamlit_app.py CHANGED
@@ -1,4 +1,6 @@
1
  # streamlit_app.py
 
 
2
  import base64
3
  import hashlib
4
  import os
@@ -93,14 +95,72 @@ def compress_video(inp: str, out: str, crf: int = 28, preset: str = "fast") -> s
93
 
94
 
95
  def download_video(url: str, dst_dir: str, password: str = "") -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  tmpl = str(Path(dst_dir) / "%(id)s.%(ext)s")
97
  opts = {"outtmpl": tmpl, "format": "best"}
98
  if password:
99
  opts["videopassword"] = password
100
- with yt_dlp.YoutubeDL(opts) as ydl:
101
- info = ydl.extract_info(url, download=True)
102
 
103
- # Try to locate the downloaded file
 
 
 
 
 
 
 
 
 
 
 
104
  if isinstance(info, dict) and info.get("id"):
105
  vid_id = info["id"]
106
  ext = info.get("ext", "mp4")
@@ -214,6 +274,27 @@ if st.sidebar.button("Load Video"):
214
  st.session_state["last_error"] = f"Download failed: {e}"
215
  st.sidebar.error(st.session_state["last_error"])
216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  if st.session_state["videos"]:
218
  try:
219
  mp4 = convert_to_mp4(st.session_state["videos"])
 
1
  # streamlit_app.py
2
+ import requests # for direct‑video download
3
+ import snscrape.modules.twitter as sntwitter # Twitter scraper
4
  import base64
5
  import hashlib
6
  import os
 
95
 
96
 
97
  def download_video(url: str, dst_dir: str, password: str = "") -> str:
98
+ """
99
+ Download a video from *url*.
100
+ - If the URL ends with a known video extension → plain HTTP download.
101
+ - If it is a YouTube / archive URL → try yt‑dlp (will raise a clear error if DNS fails).
102
+ - If it is a Twitter status URL → scrape the tweet for the first video URL.
103
+ """
104
+ video_exts = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
105
+
106
+ # --------------------------------------------------------------
107
+ # 1️⃣ Direct video file (no yt‑dlp needed)
108
+ # --------------------------------------------------------------
109
+ if url.lower().endswith(video_exts):
110
+ try:
111
+ r = requests.get(url, stream=True, timeout=30)
112
+ r.raise_for_status()
113
+ filename = sanitize_filename(url.split("/")[-1])
114
+ out_path = Path(dst_dir) / filename
115
+ with open(out_path, "wb") as f:
116
+ for chunk in r.iter_content(chunk_size=8192):
117
+ if chunk:
118
+ f.write(chunk)
119
+ return str(out_path) # already an MP4 (or other supported format)
120
+ except Exception as e:
121
+ raise RuntimeError(f"Direct download failed: {e}")
122
+
123
+ # --------------------------------------------------------------
124
+ # 2️⃣ Twitter status – scrape for video URL
125
+ # --------------------------------------------------------------
126
+ if "twitter.com" in url and "/status/" in url:
127
+ try:
128
+ tweet_id = url.split("/")[-1].split("?")[0]
129
+ for tweet in sntwitter.TwitterTweetScraper(tweet_id).get_items():
130
+ # Look for media URLs in the tweet JSON
131
+ media = tweet.media
132
+ if media:
133
+ for m in media:
134
+ if getattr(m, "video_url", None):
135
+ return download_video(m.video_url, dst_dir) # recurse as a direct URL
136
+ # If no media field, fall back to expanded URLs
137
+ for u in tweet.urls:
138
+ if u.expandedUrl.lower().endswith(video_exts):
139
+ return download_video(u.expandedUrl, dst_dir)
140
+ raise RuntimeError("No video found in the tweet.")
141
+ except Exception as e:
142
+ raise RuntimeError(f"Twitter scrape failed: {e}")
143
+
144
+ # --------------------------------------------------------------
145
+ # 3️⃣ Fallback to yt‑dlp (YouTube, archive.org, etc.)
146
+ # --------------------------------------------------------------
147
  tmpl = str(Path(dst_dir) / "%(id)s.%(ext)s")
148
  opts = {"outtmpl": tmpl, "format": "best"}
149
  if password:
150
  opts["videopassword"] = password
 
 
151
 
152
+ try:
153
+ with yt_dlp.YoutubeDL(opts) as ydl:
154
+ info = ydl.extract_info(url, download=True)
155
+ except Exception as e:
156
+ # Re‑raise with a short, user‑friendly message
157
+ raise RuntimeError(
158
+ f"yt‑dlp could not download the URL. "
159
+ f"Common reasons: DNS failure, unsupported site, or missing video. "
160
+ f"Original error: {e}"
161
+ )
162
+
163
+ # Locate the downloaded file (same logic as before)
164
  if isinstance(info, dict) and info.get("id"):
165
  vid_id = info["id"]
166
  ext = info.get("ext", "mp4")
 
274
  st.session_state["last_error"] = f"Download failed: {e}"
275
  st.sidebar.error(st.session_state["last_error"])
276
 
277
+ st.sidebar.markdown("---")
278
+ st.sidebar.subheader("🔎 Extract video from a Tweet")
279
+ tweet_url = st.sidebar.text_input(
280
+ "Tweet URL (e.g. https://twitter.com/user/status/1234567890)",
281
+ key="tweet_url",
282
+ )
283
+
284
+ if st.sidebar.button("Fetch video from tweet"):
285
+ if not tweet_url:
286
+ st.sidebar.error("Please paste a tweet URL.")
287
+ else:
288
+ try:
289
+ # The same download_video function will recognise the tweet URL
290
+ path = download_video(tweet_url, str(DATA_DIR))
291
+ st.session_state["videos"] = path
292
+ st.session_state["last_error"] = ""
293
+ st.sidebar.success("Video extracted from tweet and loaded.")
294
+ except Exception as e:
295
+ st.session_state["last_error"] = f"Tweet extraction failed: {e}"
296
+ st.sidebar.error(st.session_state["last_error"])
297
+
298
  if st.session_state["videos"]:
299
  try:
300
  mp4 = convert_to_mp4(st.session_state["videos"])