Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -190,9 +190,6 @@ def fetch_transcript(video_id):
|
|
| 190 |
For example, in https://www.youtube.com/watch?v=dQw4w9WgXcQ
|
| 191 |
the Video ID is 'dQw4w9WgXcQ'
|
| 192 |
|
| 193 |
-
Uses Bright Data proxy session if available (needed on HF Spaces
|
| 194 |
-
where direct YouTube access may be blocked by DNS issues).
|
| 195 |
-
|
| 196 |
Returns the full transcript text.
|
| 197 |
"""
|
| 198 |
global transcript_data
|
|
@@ -202,35 +199,36 @@ def fetch_transcript(video_id):
|
|
| 202 |
|
| 203 |
video_id = video_id.strip()
|
| 204 |
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
session.proxies = {"http": proxy_url, "https": proxy_url}
|
| 213 |
-
session.verify = False
|
| 214 |
-
api = YouTubeTranscriptApi(http_client=session)
|
| 215 |
-
print(f"Fetching transcript via Bright Data session for video: {video_id}")
|
| 216 |
-
else:
|
| 217 |
api = YouTubeTranscriptApi()
|
| 218 |
-
print(f"Fetching transcript
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
# Store globally for Q&A
|
| 227 |
-
transcript_data = transcript_text
|
| 228 |
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
|
| 236 |
def ask_ai_youtube(user_question, history):
|
|
|
|
| 190 |
For example, in https://www.youtube.com/watch?v=dQw4w9WgXcQ
|
| 191 |
the Video ID is 'dQw4w9WgXcQ'
|
| 192 |
|
|
|
|
|
|
|
|
|
|
| 193 |
Returns the full transcript text.
|
| 194 |
"""
|
| 195 |
global transcript_data
|
|
|
|
| 199 |
|
| 200 |
video_id = video_id.strip()
|
| 201 |
|
| 202 |
+
# Retry logic for intermittent DNS issues (common on HF Spaces)
|
| 203 |
+
import time
|
| 204 |
+
max_retries = 3
|
| 205 |
+
last_error = None
|
| 206 |
+
|
| 207 |
+
for attempt in range(max_retries):
|
| 208 |
+
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
api = YouTubeTranscriptApi()
|
| 210 |
+
print(f"Fetching transcript for video: {video_id} (attempt {attempt + 1}/{max_retries})")
|
| 211 |
+
|
| 212 |
+
transcript = api.fetch(video_id)
|
| 213 |
+
transcript_text = " ".join([snippet.text for snippet in transcript])
|
| 214 |
+
transcript_data = transcript_text
|
| 215 |
+
|
| 216 |
+
return f"✅ Transcript fetched successfully! ({len(transcript_text)} characters)\n\n{transcript_text[:2000]}{'...' if len(transcript_text) > 2000 else ''}"
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
+
except Exception as e:
|
| 219 |
+
last_error = e
|
| 220 |
+
print(f"Attempt {attempt + 1} failed: {str(e)}")
|
| 221 |
+
if attempt < max_retries - 1:
|
| 222 |
+
time.sleep(2) # Wait before retrying
|
| 223 |
|
| 224 |
+
transcript_data = ""
|
| 225 |
+
error_msg = str(last_error)
|
| 226 |
+
|
| 227 |
+
# Provide helpful error messages based on the error type
|
| 228 |
+
if "NameResolution" in error_msg or "Failed to resolve" in error_msg:
|
| 229 |
+
return f"❌ DNS resolution error (common on HF Spaces free tier).\n\nThis is a known intermittent issue with Hugging Face Spaces. Please try again in a few minutes.\n\nTechnical details: {error_msg}"
|
| 230 |
+
|
| 231 |
+
return f"❌ Error fetching transcript: {error_msg}\n\nMake sure:\n1. The Video ID is correct\n2. The video has captions/subtitles available\n3. The video is publicly accessible"
|
| 232 |
|
| 233 |
|
| 234 |
def ask_ai_youtube(user_question, history):
|