MahatirTusher commited on
Commit
e82afb7
·
verified ·
1 Parent(s): 3f006bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -13
app.py CHANGED
@@ -183,16 +183,26 @@ def get_video_id(url):
183
  def fetch_youtube_transcript(video_id):
184
  try:
185
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
186
- transcript = transcript_list.find_transcript(['en']).fetch()
187
- full_text = " ".join([item['text'] for item in transcript])
188
- return full_text
189
- except TranscriptsDisabled:
190
- return None
191
- except NoTranscriptFound:
 
 
 
 
192
  for transcript in transcript_list:
193
  if transcript.is_translatable:
194
  translated_transcript = transcript.translate('en').fetch()
195
  return " ".join([item['text'] for item in translated_transcript])
 
 
 
 
 
 
196
  return None
197
 
198
  # Function to extract subtitles using yt-dlp with cookies
@@ -205,10 +215,17 @@ def extract_subtitles_with_ytdlp(video_url):
205
  'subtitlesformat': 'vtt',
206
  'outtmpl': 'subtitle.%(ext)s',
207
  'http_headers': {
208
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
209
- 'Accept-Language': 'en-US,en;q=0.9',
 
 
 
210
  },
211
  'cookiefile': 'cookies.txt',
 
 
 
 
212
  }
213
  try:
214
  if not os.path.exists('cookies.txt'):
@@ -216,9 +233,10 @@ def extract_subtitles_with_ytdlp(video_url):
216
  "cookies.txt file not found. Please upload a valid cookies.txt file to the root directory of your Space. "
217
  "To generate it:\n"
218
  "1. Open Chrome and log in to YouTube.\n"
219
- "2. Install the 'Export Cookies' extension.\n"
220
  "3. Export cookies for 'youtube.com' and save as 'cookies.txt'.\n"
221
- "4. Upload the file to your Space via the Files tab."
 
222
  )
223
  return None
224
 
@@ -227,11 +245,9 @@ def extract_subtitles_with_ytdlp(video_url):
227
  available_subs = info.get('subtitles', {})
228
  auto_subs = info.get('automatic_captions', {})
229
 
230
- st.text(f"Available subtitles: {list(available_subs.keys())}")
231
- st.text(f"Available auto-captions: {list(auto_subs.keys())}")
232
-
233
  subtitle_langs = list(available_subs.keys()) or list(auto_subs.keys())
234
  if not subtitle_langs:
 
235
  return None
236
 
237
  ydl.params['subtitleslangs'] = subtitle_langs
@@ -245,6 +261,7 @@ def extract_subtitles_with_ytdlp(video_url):
245
  break
246
 
247
  if not subtitle_file:
 
248
  return None
249
 
250
  with open(subtitle_file, 'r', encoding='utf-8') as f:
 
183
  def fetch_youtube_transcript(video_id):
184
  try:
185
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
186
+ # Try English variants first
187
+ for lang in ['en', 'en-US', 'en-GB']:
188
+ try:
189
+ transcript = transcript_list.find_transcript([lang]).fetch()
190
+ full_text = " ".join([item['text'] for item in transcript])
191
+ return full_text
192
+ except NoTranscriptFound:
193
+ continue
194
+
195
+ # If no English transcript, try any available transcript and translate to English
196
  for transcript in transcript_list:
197
  if transcript.is_translatable:
198
  translated_transcript = transcript.translate('en').fetch()
199
  return " ".join([item['text'] for item in translated_transcript])
200
+
201
+ return None
202
+ except TranscriptsDisabled:
203
+ return None
204
+ except Exception as e:
205
+ st.error(f"Error fetching transcript with youtube-transcript-api: {str(e)}")
206
  return None
207
 
208
  # Function to extract subtitles using yt-dlp with cookies
 
215
  'subtitlesformat': 'vtt',
216
  'outtmpl': 'subtitle.%(ext)s',
217
  'http_headers': {
218
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
219
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
220
+ 'Accept-Language': 'en-US,en;q=0.5',
221
+ 'Referer': 'https://www.youtube.com/',
222
+ 'Origin': 'https://www.youtube.com',
223
  },
224
  'cookiefile': 'cookies.txt',
225
+ 'retries': 5, # Increased retries
226
+ 'retry_sleep': 3,
227
+ 'geo_bypass': True, # Attempt to bypass geo-restrictions
228
+ 'no_check_certificate': True, # Bypass certificate issues
229
  }
230
  try:
231
  if not os.path.exists('cookies.txt'):
 
233
  "cookies.txt file not found. Please upload a valid cookies.txt file to the root directory of your Space. "
234
  "To generate it:\n"
235
  "1. Open Chrome and log in to YouTube.\n"
236
+ "2. Install the 'Export Cookies' extension (or use a tool like 'cookies.txt' for Firefox).\n"
237
  "3. Export cookies for 'youtube.com' and save as 'cookies.txt'.\n"
238
+ "4. Upload the file to your Space via the Files tab.\n"
239
+ "Alternative: If this fails, test locally to rule out Spaces IP restrictions."
240
  )
241
  return None
242
 
 
245
  available_subs = info.get('subtitles', {})
246
  auto_subs = info.get('automatic_captions', {})
247
 
 
 
 
248
  subtitle_langs = list(available_subs.keys()) or list(auto_subs.keys())
249
  if not subtitle_langs:
250
+ st.warning("No subtitles or auto-captions available in any language.")
251
  return None
252
 
253
  ydl.params['subtitleslangs'] = subtitle_langs
 
261
  break
262
 
263
  if not subtitle_file:
264
+ st.warning("No subtitle files were downloaded.")
265
  return None
266
 
267
  with open(subtitle_file, 'r', encoding='utf-8') as f: