Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -104,12 +104,32 @@ def download_audio(url, cookies_file_path=None):
|
|
| 104 |
'outtmpl': output_path + '.%(ext)s',
|
| 105 |
'quiet': True,
|
| 106 |
'no_warnings': True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
}
|
| 108 |
|
| 109 |
# Add cookies file if provided
|
| 110 |
if cookies_file_path and os.path.exists(cookies_file_path):
|
| 111 |
ydl_opts['cookiefile'] = cookies_file_path
|
| 112 |
print(f"Using cookies file: {cookies_file_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
with YoutubeDL(ydl_opts) as ydl:
|
| 115 |
info_dict = ydl.extract_info(url, download=True)
|
|
@@ -124,7 +144,10 @@ def download_audio(url, cookies_file_path=None):
|
|
| 124 |
raise FileNotFoundError(f"Downloaded audio file not found")
|
| 125 |
|
| 126 |
except Exception as e:
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
def transcribe_audio(file_path):
|
| 130 |
"""Transcribe audio file using Whisper"""
|
|
@@ -352,11 +375,17 @@ with gr.Blocks(
|
|
| 352 |
)
|
| 353 |
|
| 354 |
gr.Markdown("""
|
| 355 |
-
**How to get cookies.txt:**
|
| 356 |
-
1. Install browser extension
|
| 357 |
-
2. Visit YouTube in your browser (logged in)
|
| 358 |
-
3.
|
| 359 |
4. Upload the downloaded cookies.txt file here
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
""")
|
| 361 |
|
| 362 |
url_input = gr.Textbox(
|
|
@@ -380,10 +409,11 @@ with gr.Blocks(
|
|
| 380 |
|
| 381 |
gr.Markdown("""
|
| 382 |
### 💡 Tips:
|
| 383 |
-
-
|
| 384 |
- Works best with financial YouTube channels
|
| 385 |
- Ensure video has clear audio
|
| 386 |
- English content works best
|
|
|
|
| 387 |
""")
|
| 388 |
|
| 389 |
with gr.Row():
|
|
|
|
| 104 |
'outtmpl': output_path + '.%(ext)s',
|
| 105 |
'quiet': True,
|
| 106 |
'no_warnings': True,
|
| 107 |
+
# Anti-bot detection measures
|
| 108 |
+
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
| 109 |
+
'referer': 'https://www.youtube.com/',
|
| 110 |
+
'extractor_retries': 3,
|
| 111 |
+
'fragment_retries': 3,
|
| 112 |
+
'retry_sleep_functions': {'http': lambda n: 2 ** n},
|
| 113 |
}
|
| 114 |
|
| 115 |
# Add cookies file if provided
|
| 116 |
if cookies_file_path and os.path.exists(cookies_file_path):
|
| 117 |
ydl_opts['cookiefile'] = cookies_file_path
|
| 118 |
print(f"Using cookies file: {cookies_file_path}")
|
| 119 |
+
else:
|
| 120 |
+
print("No cookies file provided - may encounter bot detection")
|
| 121 |
+
# Additional headers without cookies
|
| 122 |
+
ydl_opts.update({
|
| 123 |
+
'headers': {
|
| 124 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
| 125 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
| 126 |
+
'Accept-Language': 'en-us,en;q=0.5',
|
| 127 |
+
'Accept-Encoding': 'gzip,deflate',
|
| 128 |
+
'DNT': '1',
|
| 129 |
+
'Connection': 'keep-alive',
|
| 130 |
+
'Upgrade-Insecure-Requests': '1',
|
| 131 |
+
}
|
| 132 |
+
})
|
| 133 |
|
| 134 |
with YoutubeDL(ydl_opts) as ydl:
|
| 135 |
info_dict = ydl.extract_info(url, download=True)
|
|
|
|
| 144 |
raise FileNotFoundError(f"Downloaded audio file not found")
|
| 145 |
|
| 146 |
except Exception as e:
|
| 147 |
+
if "403" in str(e) or "Forbidden" in str(e):
|
| 148 |
+
raise Exception(f"YouTube blocked the request (403 Forbidden). Please upload your cookies.txt file to bypass bot detection. Original error: {str(e)}")
|
| 149 |
+
else:
|
| 150 |
+
raise Exception(f"Failed to download audio: {str(e)}")
|
| 151 |
|
| 152 |
def transcribe_audio(file_path):
|
| 153 |
"""Transcribe audio file using Whisper"""
|
|
|
|
| 375 |
)
|
| 376 |
|
| 377 |
gr.Markdown("""
|
| 378 |
+
**How to get cookies.txt to fix 403 Forbidden errors:**
|
| 379 |
+
1. Install browser extension: "Get cookies.txt LOCALLY"
|
| 380 |
+
2. Visit YouTube in your browser (while logged in)
|
| 381 |
+
3. Click the extension icon and export cookies for youtube.com
|
| 382 |
4. Upload the downloaded cookies.txt file here
|
| 383 |
+
|
| 384 |
+
**Alternative extensions:**
|
| 385 |
+
- "cookies.txt" (Chrome/Firefox)
|
| 386 |
+
- "Export Cookies" (Chrome)
|
| 387 |
+
|
| 388 |
+
⚠️ **Important**: Without cookies, you'll likely get 403 Forbidden errors
|
| 389 |
""")
|
| 390 |
|
| 391 |
url_input = gr.Textbox(
|
|
|
|
| 409 |
|
| 410 |
gr.Markdown("""
|
| 411 |
### 💡 Tips:
|
| 412 |
+
- **MUST upload cookies.txt** to avoid 403 Forbidden errors
|
| 413 |
- Works best with financial YouTube channels
|
| 414 |
- Ensure video has clear audio
|
| 415 |
- English content works best
|
| 416 |
+
- Try shorter videos first (under 10 minutes)
|
| 417 |
""")
|
| 418 |
|
| 419 |
with gr.Row():
|