Spaces:
Running on Zero
Running on Zero
Commit ·
bef4025
1
Parent(s): 07b891e
Upd yt video router and chunker
Browse files
app.py
CHANGED
|
@@ -214,7 +214,7 @@ def _return_yt_html_embed(yt_url):
|
|
| 214 |
)
|
| 215 |
return HTML_str
|
| 216 |
|
| 217 |
-
def download_yt_audio(yt_url, filename):
|
| 218 |
info_loader = youtube_dl.YoutubeDL()
|
| 219 |
|
| 220 |
try:
|
|
@@ -237,21 +237,41 @@ def download_yt_audio(yt_url, filename):
|
|
| 237 |
file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
|
| 238 |
raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
|
| 239 |
|
| 240 |
-
ydl_opts = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
|
| 248 |
@spaces.GPU(duration=120)
|
| 249 |
-
def yt_transcribe(yt_url, task, summarize=False, max_filesize=75.0):
|
| 250 |
html_embed_str = _return_yt_html_embed(yt_url)
|
| 251 |
|
| 252 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
| 253 |
filepath = os.path.join(tmpdirname, "video.mp4")
|
| 254 |
-
|
|
|
|
|
|
|
|
|
|
| 255 |
with open(filepath, "rb") as f:
|
| 256 |
inputs = f.read()
|
| 257 |
|
|
@@ -329,6 +349,7 @@ yt_transcribe = gr.Interface(
|
|
| 329 |
gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
|
| 330 |
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
| 331 |
gr.Checkbox(label="Summarize with Gemini", value=False),
|
|
|
|
| 332 |
],
|
| 333 |
outputs=[
|
| 334 |
"html",
|
|
|
|
| 214 |
)
|
| 215 |
return HTML_str
|
| 216 |
|
| 217 |
+
def download_yt_audio(yt_url, filename, cookies_txt: str | None = None):
|
| 218 |
info_loader = youtube_dl.YoutubeDL()
|
| 219 |
|
| 220 |
try:
|
|
|
|
| 237 |
file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
|
| 238 |
raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
|
| 239 |
|
| 240 |
+
ydl_opts = {
|
| 241 |
+
"outtmpl": filename,
|
| 242 |
+
"format": "bestaudio/best",
|
| 243 |
+
"quiet": True,
|
| 244 |
+
"noplaylist": True,
|
| 245 |
+
"retries": 3,
|
| 246 |
+
}
|
| 247 |
+
cookie_path = None
|
| 248 |
+
if cookies_txt and cookies_txt.strip():
|
| 249 |
+
tf = tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False)
|
| 250 |
+
tf.write(cookies_txt)
|
| 251 |
+
tf.close()
|
| 252 |
+
cookie_path = tf.name
|
| 253 |
+
ydl_opts["cookiefile"] = cookie_path
|
| 254 |
|
| 255 |
+
try:
|
| 256 |
+
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
| 257 |
+
try:
|
| 258 |
+
ydl.download([yt_url])
|
| 259 |
+
except youtube_dl.utils.ExtractorError as err:
|
| 260 |
+
raise gr.Error(str(err))
|
| 261 |
+
finally:
|
| 262 |
+
if cookie_path and os.path.exists(cookie_path):
|
| 263 |
+
os.unlink(cookie_path)
|
| 264 |
|
| 265 |
@spaces.GPU(duration=120)
|
| 266 |
+
def yt_transcribe(yt_url, task, summarize=False, cookies_txt=None, max_filesize=75.0):
|
| 267 |
html_embed_str = _return_yt_html_embed(yt_url)
|
| 268 |
|
| 269 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
| 270 |
filepath = os.path.join(tmpdirname, "video.mp4")
|
| 271 |
+
try:
|
| 272 |
+
download_yt_audio(yt_url, filepath, cookies_txt=cookies_txt)
|
| 273 |
+
except gr.Error as e:
|
| 274 |
+
raise gr.Error(str(e) + "\n\nTip: Provide exported YouTube cookies (Netscape format) in the optional cookies box if the video requires sign-in or captcha.")
|
| 275 |
with open(filepath, "rb") as f:
|
| 276 |
inputs = f.read()
|
| 277 |
|
|
|
|
| 349 |
gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
|
| 350 |
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
| 351 |
gr.Checkbox(label="Summarize with Gemini", value=False),
|
| 352 |
+
gr.Textbox(lines=4, placeholder="Optional: paste exported YouTube cookies in Netscape format here if the video requires sign-in.", label="YouTube cookies (optional)"),
|
| 353 |
],
|
| 354 |
outputs=[
|
| 355 |
"html",
|