Naveen-2007 commited on
Commit
8e5db5f
Β·
1 Parent(s): b47dcd2

Add web search fallback for Video Brain when YouTube transcript fails due to network issues

Browse files
Files changed (2) hide show
  1. app/api.py +93 -32
  2. tools/youtube_tool.py +37 -10
app/api.py CHANGED
@@ -1197,7 +1197,7 @@ Be detailed, practical, and use real-world best practices. Make it production-re
1197
 
1198
 
1199
  # =======================================================
1200
- # VIDEO BRAIN ENDPOINT - YouTube Video Analysis with Transcript
1201
  # =======================================================
1202
  from tools.youtube_tool import YouTubeTool
1203
  youtube_tool = YouTubeTool()
@@ -1215,8 +1215,8 @@ class VideoBrainRequest(BaseModel):
1215
  @app.post("/api/video_brain", response_model=ChatResponse)
1216
  def video_brain_mode(req: VideoBrainRequest):
1217
  """
1218
- Video Brain Mode - Analyzes YouTube videos using actual transcripts.
1219
- Extracts real transcript and provides accurate responses.
1220
  """
1221
  q = req.message.strip()
1222
  ws = req.workspace_id
@@ -1237,35 +1237,73 @@ def video_brain_mode(req: VideoBrainRequest):
1237
  workspace_id=ws
1238
  )
1239
 
1240
- # Check if we already have transcript for this video
1241
  video_id = youtube_tool.extract_video_id(youtube_url)
1242
  cache_key = f"{ws}_{video_id}"
1243
 
 
 
1244
  transcript_data = None
 
1245
  if cache_key in video_transcripts:
1246
  transcript_data = video_transcripts[cache_key]
1247
- print(f" πŸ“‹ Using cached transcript for {video_id}")
1248
- else:
1249
- # Fetch new transcript
 
 
 
1250
  print(f" πŸ”„ Fetching transcript for video: {video_id}")
1251
  transcript_data = youtube_tool.get_transcript(youtube_url)
1252
- if transcript_data["success"]:
 
1253
  video_transcripts[cache_key] = transcript_data
1254
- print(f" βœ… Transcript fetched: {len(transcript_data.get('transcript', ''))} chars")
 
1255
  else:
1256
- print(f" ⚠️ Transcript fetch failed: {transcript_data.get('error')}")
1257
 
1258
- # Build context for LLM
1259
- transcript_text = ""
1260
- if transcript_data and transcript_data.get("success"):
1261
- # Use clean transcript for context (with timestamps)
1262
- transcript_text = transcript_data.get("transcript", "")[:8000] # Limit for context window
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1263
 
1264
- # Generate appropriate prompt based on question type
1265
  q_lower = q.lower()
1266
  is_summary = any(word in q_lower for word in ["summarize", "summary", "overview", "main points", "key takeaways", "what is this about"])
1267
 
1268
  if transcript_text:
 
1269
  if is_summary:
1270
  prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing YouTube videos.
1271
 
@@ -1293,22 +1331,43 @@ Answer the question using ONLY the information from the transcript above.
1293
  - Be specific and cite timestamps when relevant
1294
  - If the answer is not in the transcript, say so honestly
1295
  - Format your response clearly with bullet points if appropriate"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1296
  else:
1297
- # No transcript available - explain why
1298
  error_msg = transcript_data.get("error", "Unknown error") if transcript_data else "Could not fetch transcript"
1299
- prompt = f"""The user asked about a YouTube video but I couldn't extract the transcript.
1300
 
1301
  Video URL: {youtube_url}
1302
  Error: {error_msg}
1303
  User Question: {q}
1304
 
1305
- Explain that:
1306
- 1. The transcript couldn't be fetched (reason: {error_msg})
1307
- 2. Suggest they try:
1308
- - A different video that has captions/subtitles enabled
1309
- - Checking if the video is public and available
1310
- - Using YouTube's built-in transcript feature (click ... > Show transcript)
1311
- 3. Offer to help once they can provide the transcript text manually"""
1312
 
1313
  try:
1314
  msgs = build_context(ws, prompt)
@@ -1317,8 +1376,8 @@ Explain that:
1317
  print(f" ❌ LLM error: {e}")
1318
  answer = f"Error generating response: {str(e)[:100]}"
1319
 
1320
- # Generate contextual follow-up questions
1321
- if transcript_text:
1322
  follow = [
1323
  "What are the main arguments or points made?",
1324
  "Summarize this in 3 bullet points",
@@ -1328,14 +1387,16 @@ Explain that:
1328
  ]
1329
  else:
1330
  follow = [
 
1331
  "Try a different YouTube video",
1332
- "How do I enable captions on YouTube?",
1333
- "What videos work best with Video Brain?"
1334
  ]
1335
 
1336
- # Build sources
1337
- sources = [{"title": f"πŸŽ₯ YouTube Video", "url": youtube_url}]
1338
- links = [{"title": "Source Video", "url": youtube_url, "snippet": f"Video ID: {video_id}"}]
 
 
1339
 
1340
  memory.add(ws, "assistant", answer)
1341
  print(f" βœ… Video Brain: Response generated")
 
1197
 
1198
 
1199
  # =======================================================
1200
+ # VIDEO BRAIN ENDPOINT - YouTube Video Analysis with Transcript + Web Fallback
1201
  # =======================================================
1202
  from tools.youtube_tool import YouTubeTool
1203
  youtube_tool = YouTubeTool()
 
1215
  @app.post("/api/video_brain", response_model=ChatResponse)
1216
  def video_brain_mode(req: VideoBrainRequest):
1217
  """
1218
+ Video Brain Mode - Analyzes YouTube videos.
1219
+ Uses transcript extraction with web search fallback.
1220
  """
1221
  q = req.message.strip()
1222
  ws = req.workspace_id
 
1237
  workspace_id=ws
1238
  )
1239
 
 
1240
  video_id = youtube_tool.extract_video_id(youtube_url)
1241
  cache_key = f"{ws}_{video_id}"
1242
 
1243
+ # Try to get transcript
1244
+ transcript_text = ""
1245
  transcript_data = None
1246
+
1247
  if cache_key in video_transcripts:
1248
  transcript_data = video_transcripts[cache_key]
1249
+ if transcript_data.get("success"):
1250
+ transcript_text = transcript_data.get("transcript", "")[:8000]
1251
+ print(f" πŸ“‹ Using cached transcript")
1252
+
1253
+ if not transcript_text:
1254
+ # Try fresh transcript fetch
1255
  print(f" πŸ”„ Fetching transcript for video: {video_id}")
1256
  transcript_data = youtube_tool.get_transcript(youtube_url)
1257
+
1258
+ if transcript_data.get("success"):
1259
  video_transcripts[cache_key] = transcript_data
1260
+ transcript_text = transcript_data.get("transcript", "")[:8000]
1261
+ print(f" βœ… Transcript fetched: {len(transcript_text)} chars")
1262
  else:
1263
+ print(f" ⚠️ Transcript failed: {transcript_data.get('error')}")
1264
 
1265
+ # If no transcript, use web search fallback
1266
+ video_context = ""
1267
+ sources = []
1268
+ links = []
1269
+
1270
+ if not transcript_text:
1271
+ print(f" 🌐 Using web search fallback...")
1272
+ try:
1273
+ # Search for video info and summaries
1274
+ if search_tool:
1275
+ search_queries = [
1276
+ f"youtube video {video_id} summary transcript",
1277
+ f"youtube {video_id} key points explained"
1278
+ ]
1279
+
1280
+ for sq in search_queries[:1]: # Just one search to save time
1281
+ results = search_tool.search(sq, num_results=4)
1282
+
1283
+ # Get Tavily AI answer
1284
+ if results and results[0].get("tavily_answer"):
1285
+ video_context += f"[Video Summary]: {results[0]['tavily_answer']}\n\n"
1286
+
1287
+ for r in results:
1288
+ url = r.get("url", "")
1289
+ title = r.get("title", "")
1290
+ content = r.get("content", "")
1291
+
1292
+ if content:
1293
+ video_context += f"[{title}]: {content[:1000]}\n\n"
1294
+ links.append({"title": title, "url": url, "snippet": content[:150]})
1295
+ sources.append({"title": title, "url": url})
1296
+
1297
+ print(f" πŸ“„ Web fallback gathered: {len(video_context)} chars, {len(sources)} sources")
1298
+ except Exception as e:
1299
+ print(f" ❌ Web search fallback error: {e}")
1300
 
1301
+ # Build prompt
1302
  q_lower = q.lower()
1303
  is_summary = any(word in q_lower for word in ["summarize", "summary", "overview", "main points", "key takeaways", "what is this about"])
1304
 
1305
  if transcript_text:
1306
+ # Have real transcript
1307
  if is_summary:
1308
  prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing YouTube videos.
1309
 
 
1331
  - Be specific and cite timestamps when relevant
1332
  - If the answer is not in the transcript, say so honestly
1333
  - Format your response clearly with bullet points if appropriate"""
1334
+
1335
+ sources = [{"title": "πŸŽ₯ YouTube Video (Transcript)", "url": youtube_url}]
1336
+ links = [{"title": "Source Video", "url": youtube_url, "snippet": f"Video ID: {video_id} - Full transcript available"}]
1337
+
1338
+ elif video_context:
1339
+ # Have web search fallback context
1340
+ prompt = f"""You are VIDEO BRAIN AI. I couldn't get the direct transcript, but found related information about this video.
1341
+
1342
+ VIDEO URL: {youtube_url}
1343
+ VIDEO ID: {video_id}
1344
+
1345
+ AVAILABLE INFORMATION FROM WEB:
1346
+ {video_context[:6000]}
1347
+
1348
+ USER QUESTION: {q}
1349
+
1350
+ Based on the available information:
1351
+ 1. Answer the user's question as best as you can
1352
+ 2. Be clear that this is based on web search results, not the actual transcript
1353
+ 3. If summarizing, provide the key points found
1354
+ 4. Suggest the user can paste the transcript directly for more accurate analysis"""
1355
  else:
1356
+ # No information available
1357
  error_msg = transcript_data.get("error", "Unknown error") if transcript_data else "Could not fetch transcript"
1358
+ prompt = f"""I couldn't analyze the YouTube video.
1359
 
1360
  Video URL: {youtube_url}
1361
  Error: {error_msg}
1362
  User Question: {q}
1363
 
1364
+ Please explain:
1365
+ 1. Why the transcript couldn't be fetched (network/DNS issues on this server)
1366
+ 2. Alternative: The user can:
1367
+ - Open YouTube, click "..." under the video, select "Show transcript"
1368
+ - Copy and paste the transcript text here
1369
+ - I can then analyze it accurately
1370
+ 3. Or they can try a different video"""
1371
 
1372
  try:
1373
  msgs = build_context(ws, prompt)
 
1376
  print(f" ❌ LLM error: {e}")
1377
  answer = f"Error generating response: {str(e)[:100]}"
1378
 
1379
+ # Follow-up questions
1380
+ if transcript_text or video_context:
1381
  follow = [
1382
  "What are the main arguments or points made?",
1383
  "Summarize this in 3 bullet points",
 
1387
  ]
1388
  else:
1389
  follow = [
1390
+ "Paste the transcript text here",
1391
  "Try a different YouTube video",
1392
+ "How do I get a YouTube transcript?"
 
1393
  ]
1394
 
1395
+ # Add video source if not already added
1396
+ if not sources:
1397
+ sources = [{"title": "πŸŽ₯ YouTube Video", "url": youtube_url}]
1398
+ if not links:
1399
+ links = [{"title": "Source Video", "url": youtube_url, "snippet": f"Video ID: {video_id}"}]
1400
 
1401
  memory.add(ws, "assistant", answer)
1402
  print(f" βœ… Video Brain: Response generated")
tools/youtube_tool.py CHANGED
@@ -6,12 +6,19 @@ Extracts transcripts from YouTube videos for Video Brain mode.
6
 
7
  import re
8
  from typing import Dict, Optional
9
- from youtube_transcript_api import YouTubeTranscriptApi
10
- from youtube_transcript_api._errors import (
11
- TranscriptsDisabled,
12
- NoTranscriptFound,
13
- VideoUnavailable
14
- )
 
 
 
 
 
 
 
15
 
16
 
17
  class YouTubeTool:
@@ -53,6 +60,15 @@ class YouTubeTool:
53
  "video_id": None
54
  }
55
 
 
 
 
 
 
 
 
 
 
56
  try:
57
  # Try to get transcript (auto-generated or manual)
58
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
@@ -107,7 +123,7 @@ class YouTubeTool:
107
  "video_id": video_id
108
  }
109
 
110
- except TranscriptsDisabled:
111
  return {
112
  "success": False,
113
  "error": "Transcripts are disabled for this video",
@@ -115,7 +131,7 @@ class YouTubeTool:
115
  "segments": [],
116
  "video_id": video_id
117
  }
118
- except NoTranscriptFound:
119
  return {
120
  "success": False,
121
  "error": "No transcript found for this video",
@@ -123,7 +139,7 @@ class YouTubeTool:
123
  "segments": [],
124
  "video_id": video_id
125
  }
126
- except VideoUnavailable:
127
  return {
128
  "success": False,
129
  "error": "Video is unavailable",
@@ -132,9 +148,20 @@ class YouTubeTool:
132
  "video_id": video_id
133
  }
134
  except Exception as e:
 
 
 
 
 
 
 
 
 
 
 
135
  return {
136
  "success": False,
137
- "error": f"Error fetching transcript: {str(e)}",
138
  "transcript": "",
139
  "segments": [],
140
  "video_id": video_id
 
6
 
7
  import re
8
  from typing import Dict, Optional
9
+
10
+ # Try to import youtube_transcript_api, but handle if it fails
11
+ try:
12
+ from youtube_transcript_api import YouTubeTranscriptApi
13
+ from youtube_transcript_api._errors import (
14
+ TranscriptsDisabled,
15
+ NoTranscriptFound,
16
+ VideoUnavailable
17
+ )
18
+ YOUTUBE_API_AVAILABLE = True
19
+ except ImportError:
20
+ YOUTUBE_API_AVAILABLE = False
21
+ print("⚠️ youtube-transcript-api not available")
22
 
23
 
24
  class YouTubeTool:
 
60
  "video_id": None
61
  }
62
 
63
+ if not YOUTUBE_API_AVAILABLE:
64
+ return {
65
+ "success": False,
66
+ "error": "YouTube transcript API not available",
67
+ "transcript": "",
68
+ "segments": [],
69
+ "video_id": video_id
70
+ }
71
+
72
  try:
73
  # Try to get transcript (auto-generated or manual)
74
  transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
 
123
  "video_id": video_id
124
  }
125
 
126
+ except TranscriptsDisabled if YOUTUBE_API_AVAILABLE else Exception:
127
  return {
128
  "success": False,
129
  "error": "Transcripts are disabled for this video",
 
131
  "segments": [],
132
  "video_id": video_id
133
  }
134
+ except NoTranscriptFound if YOUTUBE_API_AVAILABLE else Exception:
135
  return {
136
  "success": False,
137
  "error": "No transcript found for this video",
 
139
  "segments": [],
140
  "video_id": video_id
141
  }
142
+ except VideoUnavailable if YOUTUBE_API_AVAILABLE else Exception:
143
  return {
144
  "success": False,
145
  "error": "Video is unavailable",
 
148
  "video_id": video_id
149
  }
150
  except Exception as e:
151
+ error_msg = str(e)
152
+ # Check for network errors
153
+ if "NameResolutionError" in error_msg or "Failed to resolve" in error_msg:
154
+ return {
155
+ "success": False,
156
+ "error": "Network error: Cannot connect to YouTube (DNS resolution failed)",
157
+ "transcript": "",
158
+ "segments": [],
159
+ "video_id": video_id,
160
+ "network_error": True
161
+ }
162
  return {
163
  "success": False,
164
+ "error": f"Error fetching transcript: {error_msg[:200]}",
165
  "transcript": "",
166
  "segments": [],
167
  "video_id": video_id