Spaces:

MuhammadAhmadZia
/

MSDSF25M004_Ver2

Sleeping

App Files Files Community

MuhammadAhmadZia commited on Mar 1

Commit

d6bd119

verified ·

1 Parent(s): 3276405

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +27 -29

app.py CHANGED Viewed

@@ -190,9 +190,6 @@ def fetch_transcript(video_id):
     For example, in https://www.youtube.com/watch?v=dQw4w9WgXcQ
     the Video ID is 'dQw4w9WgXcQ'
-    Uses Bright Data proxy session if available (needed on HF Spaces
-    where direct YouTube access may be blocked by DNS issues).
     Returns the full transcript text.
     """
     global transcript_data
@@ -202,35 +199,36 @@ def fetch_transcript(video_id):
     video_id = video_id.strip()
-    try:
-        # If Bright Data credentials are available, create a requests.Session
-        # with proxy configured and pass it as http_client to bypass DNS issues on HF Spaces
-        if bright_data_username and bright_data_password:
-            import requests as req
-            session = req.Session()
-            proxy_url = f"http://{bright_data_username}:{bright_data_password}@brd.superproxy.io:33335"
-            session.proxies = {"http": proxy_url, "https": proxy_url}
-            session.verify = False
-            api = YouTubeTranscriptApi(http_client=session)
-            print(f"Fetching transcript via Bright Data session for video: {video_id}")
-        else:
             api = YouTubeTranscriptApi()
-            print(f"Fetching transcript directly for video: {video_id}")
-        # Fetch the transcript for the given video ID
-        transcript = api.fetch(video_id)
-        # Join all transcript snippets into a single text
-        transcript_text = " ".join([snippet.text for snippet in transcript])
-        # Store globally for Q&A
-        transcript_data = transcript_text
-        return f"✅ Transcript fetched successfully! ({len(transcript_text)} characters)\n\n{transcript_text[:2000]}{'...' if len(transcript_text) > 2000 else ''}"
-    except Exception as e:
-        transcript_data = ""
-        return f"❌ Error fetching transcript: {str(e)}\n\nMake sure:\n1. The Video ID is correct\n2. The video has captions/subtitles available\n3. The video is publicly accessible"
 def ask_ai_youtube(user_question, history):

     For example, in https://www.youtube.com/watch?v=dQw4w9WgXcQ
     the Video ID is 'dQw4w9WgXcQ'
     Returns the full transcript text.
     """
     global transcript_data
     video_id = video_id.strip()
+    # Retry logic for intermittent DNS issues (common on HF Spaces)
+    import time
+    max_retries = 3
+    last_error = None
+    for attempt in range(max_retries):
+        try:
             api = YouTubeTranscriptApi()
+            print(f"Fetching transcript for video: {video_id} (attempt {attempt + 1}/{max_retries})")
+            transcript = api.fetch(video_id)
+            transcript_text = " ".join([snippet.text for snippet in transcript])
+            transcript_data = transcript_text
+            return f"✅ Transcript fetched successfully! ({len(transcript_text)} characters)\n\n{transcript_text[:2000]}{'...' if len(transcript_text) > 2000 else ''}"
+        except Exception as e:
+            last_error = e
+            print(f"Attempt {attempt + 1} failed: {str(e)}")
+            if attempt < max_retries - 1:
+                time.sleep(2)  # Wait before retrying
+    transcript_data = ""
+    error_msg = str(last_error)
+    # Provide helpful error messages based on the error type
+    if "NameResolution" in error_msg or "Failed to resolve" in error_msg:
+        return f"❌ DNS resolution error (common on HF Spaces free tier).\n\nThis is a known intermittent issue with Hugging Face Spaces. Please try again in a few minutes.\n\nTechnical details: {error_msg}"
+    return f"❌ Error fetching transcript: {error_msg}\n\nMake sure:\n1. The Video ID is correct\n2. The video has captions/subtitles available\n3. The video is publicly accessible"
 def ask_ai_youtube(user_question, history):