MuhammadAhmadZia commited on
Commit
d6bd119
·
verified ·
1 Parent(s): 3276405

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +27 -29
app.py CHANGED
@@ -190,9 +190,6 @@ def fetch_transcript(video_id):
190
  For example, in https://www.youtube.com/watch?v=dQw4w9WgXcQ
191
  the Video ID is 'dQw4w9WgXcQ'
192
 
193
- Uses Bright Data proxy session if available (needed on HF Spaces
194
- where direct YouTube access may be blocked by DNS issues).
195
-
196
  Returns the full transcript text.
197
  """
198
  global transcript_data
@@ -202,35 +199,36 @@ def fetch_transcript(video_id):
202
 
203
  video_id = video_id.strip()
204
 
205
- try:
206
- # If Bright Data credentials are available, create a requests.Session
207
- # with proxy configured and pass it as http_client to bypass DNS issues on HF Spaces
208
- if bright_data_username and bright_data_password:
209
- import requests as req
210
- session = req.Session()
211
- proxy_url = f"http://{bright_data_username}:{bright_data_password}@brd.superproxy.io:33335"
212
- session.proxies = {"http": proxy_url, "https": proxy_url}
213
- session.verify = False
214
- api = YouTubeTranscriptApi(http_client=session)
215
- print(f"Fetching transcript via Bright Data session for video: {video_id}")
216
- else:
217
  api = YouTubeTranscriptApi()
218
- print(f"Fetching transcript directly for video: {video_id}")
219
-
220
- # Fetch the transcript for the given video ID
221
- transcript = api.fetch(video_id)
222
-
223
- # Join all transcript snippets into a single text
224
- transcript_text = " ".join([snippet.text for snippet in transcript])
225
-
226
- # Store globally for Q&A
227
- transcript_data = transcript_text
228
 
229
- return f"✅ Transcript fetched successfully! ({len(transcript_text)} characters)\n\n{transcript_text[:2000]}{'...' if len(transcript_text) > 2000 else ''}"
 
 
 
 
230
 
231
- except Exception as e:
232
- transcript_data = ""
233
- return f"❌ Error fetching transcript: {str(e)}\n\nMake sure:\n1. The Video ID is correct\n2. The video has captions/subtitles available\n3. The video is publicly accessible"
 
 
 
 
 
234
 
235
 
236
  def ask_ai_youtube(user_question, history):
 
190
  For example, in https://www.youtube.com/watch?v=dQw4w9WgXcQ
191
  the Video ID is 'dQw4w9WgXcQ'
192
 
 
 
 
193
  Returns the full transcript text.
194
  """
195
  global transcript_data
 
199
 
200
  video_id = video_id.strip()
201
 
202
+ # Retry logic for intermittent DNS issues (common on HF Spaces)
203
+ import time
204
+ max_retries = 3
205
+ last_error = None
206
+
207
+ for attempt in range(max_retries):
208
+ try:
 
 
 
 
 
209
  api = YouTubeTranscriptApi()
210
+ print(f"Fetching transcript for video: {video_id} (attempt {attempt + 1}/{max_retries})")
211
+
212
+ transcript = api.fetch(video_id)
213
+ transcript_text = " ".join([snippet.text for snippet in transcript])
214
+ transcript_data = transcript_text
215
+
216
+ return f" Transcript fetched successfully! ({len(transcript_text)} characters)\n\n{transcript_text[:2000]}{'...' if len(transcript_text) > 2000 else ''}"
 
 
 
217
 
218
+ except Exception as e:
219
+ last_error = e
220
+ print(f"Attempt {attempt + 1} failed: {str(e)}")
221
+ if attempt < max_retries - 1:
222
+ time.sleep(2) # Wait before retrying
223
 
224
+ transcript_data = ""
225
+ error_msg = str(last_error)
226
+
227
+ # Provide helpful error messages based on the error type
228
+ if "NameResolution" in error_msg or "Failed to resolve" in error_msg:
229
+ return f"❌ DNS resolution error (common on HF Spaces free tier).\n\nThis is a known intermittent issue with Hugging Face Spaces. Please try again in a few minutes.\n\nTechnical details: {error_msg}"
230
+
231
+ return f"❌ Error fetching transcript: {error_msg}\n\nMake sure:\n1. The Video ID is correct\n2. The video has captions/subtitles available\n3. The video is publicly accessible"
232
 
233
 
234
  def ask_ai_youtube(user_question, history):