Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -183,16 +183,26 @@ def get_video_id(url):
|
|
| 183 |
def fetch_youtube_transcript(video_id):
|
| 184 |
try:
|
| 185 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
for transcript in transcript_list:
|
| 193 |
if transcript.is_translatable:
|
| 194 |
translated_transcript = transcript.translate('en').fetch()
|
| 195 |
return " ".join([item['text'] for item in translated_transcript])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
return None
|
| 197 |
|
| 198 |
# Function to extract subtitles using yt-dlp with cookies
|
|
@@ -205,10 +215,17 @@ def extract_subtitles_with_ytdlp(video_url):
|
|
| 205 |
'subtitlesformat': 'vtt',
|
| 206 |
'outtmpl': 'subtitle.%(ext)s',
|
| 207 |
'http_headers': {
|
| 208 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
| 209 |
-
'Accept
|
|
|
|
|
|
|
|
|
|
| 210 |
},
|
| 211 |
'cookiefile': 'cookies.txt',
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
}
|
| 213 |
try:
|
| 214 |
if not os.path.exists('cookies.txt'):
|
|
@@ -216,9 +233,10 @@ def extract_subtitles_with_ytdlp(video_url):
|
|
| 216 |
"cookies.txt file not found. Please upload a valid cookies.txt file to the root directory of your Space. "
|
| 217 |
"To generate it:\n"
|
| 218 |
"1. Open Chrome and log in to YouTube.\n"
|
| 219 |
-
"2. Install the 'Export Cookies' extension.\n"
|
| 220 |
"3. Export cookies for 'youtube.com' and save as 'cookies.txt'.\n"
|
| 221 |
-
"4. Upload the file to your Space via the Files tab
|
|
|
|
| 222 |
)
|
| 223 |
return None
|
| 224 |
|
|
@@ -227,11 +245,9 @@ def extract_subtitles_with_ytdlp(video_url):
|
|
| 227 |
available_subs = info.get('subtitles', {})
|
| 228 |
auto_subs = info.get('automatic_captions', {})
|
| 229 |
|
| 230 |
-
st.text(f"Available subtitles: {list(available_subs.keys())}")
|
| 231 |
-
st.text(f"Available auto-captions: {list(auto_subs.keys())}")
|
| 232 |
-
|
| 233 |
subtitle_langs = list(available_subs.keys()) or list(auto_subs.keys())
|
| 234 |
if not subtitle_langs:
|
|
|
|
| 235 |
return None
|
| 236 |
|
| 237 |
ydl.params['subtitleslangs'] = subtitle_langs
|
|
@@ -245,6 +261,7 @@ def extract_subtitles_with_ytdlp(video_url):
|
|
| 245 |
break
|
| 246 |
|
| 247 |
if not subtitle_file:
|
|
|
|
| 248 |
return None
|
| 249 |
|
| 250 |
with open(subtitle_file, 'r', encoding='utf-8') as f:
|
|
|
|
| 183 |
def fetch_youtube_transcript(video_id):
|
| 184 |
try:
|
| 185 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 186 |
+
# Try English variants first
|
| 187 |
+
for lang in ['en', 'en-US', 'en-GB']:
|
| 188 |
+
try:
|
| 189 |
+
transcript = transcript_list.find_transcript([lang]).fetch()
|
| 190 |
+
full_text = " ".join([item['text'] for item in transcript])
|
| 191 |
+
return full_text
|
| 192 |
+
except NoTranscriptFound:
|
| 193 |
+
continue
|
| 194 |
+
|
| 195 |
+
# If no English transcript, try any available transcript and translate to English
|
| 196 |
for transcript in transcript_list:
|
| 197 |
if transcript.is_translatable:
|
| 198 |
translated_transcript = transcript.translate('en').fetch()
|
| 199 |
return " ".join([item['text'] for item in translated_transcript])
|
| 200 |
+
|
| 201 |
+
return None
|
| 202 |
+
except TranscriptsDisabled:
|
| 203 |
+
return None
|
| 204 |
+
except Exception as e:
|
| 205 |
+
st.error(f"Error fetching transcript with youtube-transcript-api: {str(e)}")
|
| 206 |
return None
|
| 207 |
|
| 208 |
# Function to extract subtitles using yt-dlp with cookies
|
|
|
|
| 215 |
'subtitlesformat': 'vtt',
|
| 216 |
'outtmpl': 'subtitle.%(ext)s',
|
| 217 |
'http_headers': {
|
| 218 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
| 219 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
| 220 |
+
'Accept-Language': 'en-US,en;q=0.5',
|
| 221 |
+
'Referer': 'https://www.youtube.com/',
|
| 222 |
+
'Origin': 'https://www.youtube.com',
|
| 223 |
},
|
| 224 |
'cookiefile': 'cookies.txt',
|
| 225 |
+
'retries': 5, # Increased retries
|
| 226 |
+
'retry_sleep': 3,
|
| 227 |
+
'geo_bypass': True, # Attempt to bypass geo-restrictions
|
| 228 |
+
'no_check_certificate': True, # Bypass certificate issues
|
| 229 |
}
|
| 230 |
try:
|
| 231 |
if not os.path.exists('cookies.txt'):
|
|
|
|
| 233 |
"cookies.txt file not found. Please upload a valid cookies.txt file to the root directory of your Space. "
|
| 234 |
"To generate it:\n"
|
| 235 |
"1. Open Chrome and log in to YouTube.\n"
|
| 236 |
+
"2. Install the 'Export Cookies' extension (or use a tool like 'cookies.txt' for Firefox).\n"
|
| 237 |
"3. Export cookies for 'youtube.com' and save as 'cookies.txt'.\n"
|
| 238 |
+
"4. Upload the file to your Space via the Files tab.\n"
|
| 239 |
+
"Alternative: If this fails, test locally to rule out Spaces IP restrictions."
|
| 240 |
)
|
| 241 |
return None
|
| 242 |
|
|
|
|
| 245 |
available_subs = info.get('subtitles', {})
|
| 246 |
auto_subs = info.get('automatic_captions', {})
|
| 247 |
|
|
|
|
|
|
|
|
|
|
| 248 |
subtitle_langs = list(available_subs.keys()) or list(auto_subs.keys())
|
| 249 |
if not subtitle_langs:
|
| 250 |
+
st.warning("No subtitles or auto-captions available in any language.")
|
| 251 |
return None
|
| 252 |
|
| 253 |
ydl.params['subtitleslangs'] = subtitle_langs
|
|
|
|
| 261 |
break
|
| 262 |
|
| 263 |
if not subtitle_file:
|
| 264 |
+
st.warning("No subtitle files were downloaded.")
|
| 265 |
return None
|
| 266 |
|
| 267 |
with open(subtitle_file, 'r', encoding='utf-8') as f:
|