Spaces:
Running
Running
Commit Β·
8e5db5f
1
Parent(s): b47dcd2
Add web search fallback for Video Brain when YouTube transcript fails due to network issues
Browse files- app/api.py +93 -32
- tools/youtube_tool.py +37 -10
app/api.py
CHANGED
|
@@ -1197,7 +1197,7 @@ Be detailed, practical, and use real-world best practices. Make it production-re
|
|
| 1197 |
|
| 1198 |
|
| 1199 |
# =======================================================
|
| 1200 |
-
# VIDEO BRAIN ENDPOINT - YouTube Video Analysis with Transcript
|
| 1201 |
# =======================================================
|
| 1202 |
from tools.youtube_tool import YouTubeTool
|
| 1203 |
youtube_tool = YouTubeTool()
|
|
@@ -1215,8 +1215,8 @@ class VideoBrainRequest(BaseModel):
|
|
| 1215 |
@app.post("/api/video_brain", response_model=ChatResponse)
|
| 1216 |
def video_brain_mode(req: VideoBrainRequest):
|
| 1217 |
"""
|
| 1218 |
-
Video Brain Mode - Analyzes YouTube videos
|
| 1219 |
-
|
| 1220 |
"""
|
| 1221 |
q = req.message.strip()
|
| 1222 |
ws = req.workspace_id
|
|
@@ -1237,35 +1237,73 @@ def video_brain_mode(req: VideoBrainRequest):
|
|
| 1237 |
workspace_id=ws
|
| 1238 |
)
|
| 1239 |
|
| 1240 |
-
# Check if we already have transcript for this video
|
| 1241 |
video_id = youtube_tool.extract_video_id(youtube_url)
|
| 1242 |
cache_key = f"{ws}_{video_id}"
|
| 1243 |
|
|
|
|
|
|
|
| 1244 |
transcript_data = None
|
|
|
|
| 1245 |
if cache_key in video_transcripts:
|
| 1246 |
transcript_data = video_transcripts[cache_key]
|
| 1247 |
-
|
| 1248 |
-
|
| 1249 |
-
|
|
|
|
|
|
|
|
|
|
| 1250 |
print(f" π Fetching transcript for video: {video_id}")
|
| 1251 |
transcript_data = youtube_tool.get_transcript(youtube_url)
|
| 1252 |
-
|
|
|
|
| 1253 |
video_transcripts[cache_key] = transcript_data
|
| 1254 |
-
|
|
|
|
| 1255 |
else:
|
| 1256 |
-
print(f" β οΈ Transcript
|
| 1257 |
|
| 1258 |
-
#
|
| 1259 |
-
|
| 1260 |
-
|
| 1261 |
-
|
| 1262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1263 |
|
| 1264 |
-
#
|
| 1265 |
q_lower = q.lower()
|
| 1266 |
is_summary = any(word in q_lower for word in ["summarize", "summary", "overview", "main points", "key takeaways", "what is this about"])
|
| 1267 |
|
| 1268 |
if transcript_text:
|
|
|
|
| 1269 |
if is_summary:
|
| 1270 |
prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing YouTube videos.
|
| 1271 |
|
|
@@ -1293,22 +1331,43 @@ Answer the question using ONLY the information from the transcript above.
|
|
| 1293 |
- Be specific and cite timestamps when relevant
|
| 1294 |
- If the answer is not in the transcript, say so honestly
|
| 1295 |
- Format your response clearly with bullet points if appropriate"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1296 |
else:
|
| 1297 |
-
# No
|
| 1298 |
error_msg = transcript_data.get("error", "Unknown error") if transcript_data else "Could not fetch transcript"
|
| 1299 |
-
prompt = f"""
|
| 1300 |
|
| 1301 |
Video URL: {youtube_url}
|
| 1302 |
Error: {error_msg}
|
| 1303 |
User Question: {q}
|
| 1304 |
|
| 1305 |
-
|
| 1306 |
-
1.
|
| 1307 |
-
2.
|
| 1308 |
-
-
|
| 1309 |
-
-
|
| 1310 |
-
-
|
| 1311 |
-
3.
|
| 1312 |
|
| 1313 |
try:
|
| 1314 |
msgs = build_context(ws, prompt)
|
|
@@ -1317,8 +1376,8 @@ Explain that:
|
|
| 1317 |
print(f" β LLM error: {e}")
|
| 1318 |
answer = f"Error generating response: {str(e)[:100]}"
|
| 1319 |
|
| 1320 |
-
#
|
| 1321 |
-
if transcript_text:
|
| 1322 |
follow = [
|
| 1323 |
"What are the main arguments or points made?",
|
| 1324 |
"Summarize this in 3 bullet points",
|
|
@@ -1328,14 +1387,16 @@ Explain that:
|
|
| 1328 |
]
|
| 1329 |
else:
|
| 1330 |
follow = [
|
|
|
|
| 1331 |
"Try a different YouTube video",
|
| 1332 |
-
"How do I
|
| 1333 |
-
"What videos work best with Video Brain?"
|
| 1334 |
]
|
| 1335 |
|
| 1336 |
-
#
|
| 1337 |
-
|
| 1338 |
-
|
|
|
|
|
|
|
| 1339 |
|
| 1340 |
memory.add(ws, "assistant", answer)
|
| 1341 |
print(f" β
Video Brain: Response generated")
|
|
|
|
| 1197 |
|
| 1198 |
|
| 1199 |
# =======================================================
|
| 1200 |
+
# VIDEO BRAIN ENDPOINT - YouTube Video Analysis with Transcript + Web Fallback
|
| 1201 |
# =======================================================
|
| 1202 |
from tools.youtube_tool import YouTubeTool
|
| 1203 |
youtube_tool = YouTubeTool()
|
|
|
|
| 1215 |
@app.post("/api/video_brain", response_model=ChatResponse)
|
| 1216 |
def video_brain_mode(req: VideoBrainRequest):
|
| 1217 |
"""
|
| 1218 |
+
Video Brain Mode - Analyzes YouTube videos.
|
| 1219 |
+
Uses transcript extraction with web search fallback.
|
| 1220 |
"""
|
| 1221 |
q = req.message.strip()
|
| 1222 |
ws = req.workspace_id
|
|
|
|
| 1237 |
workspace_id=ws
|
| 1238 |
)
|
| 1239 |
|
|
|
|
| 1240 |
video_id = youtube_tool.extract_video_id(youtube_url)
|
| 1241 |
cache_key = f"{ws}_{video_id}"
|
| 1242 |
|
| 1243 |
+
# Try to get transcript
|
| 1244 |
+
transcript_text = ""
|
| 1245 |
transcript_data = None
|
| 1246 |
+
|
| 1247 |
if cache_key in video_transcripts:
|
| 1248 |
transcript_data = video_transcripts[cache_key]
|
| 1249 |
+
if transcript_data.get("success"):
|
| 1250 |
+
transcript_text = transcript_data.get("transcript", "")[:8000]
|
| 1251 |
+
print(f" π Using cached transcript")
|
| 1252 |
+
|
| 1253 |
+
if not transcript_text:
|
| 1254 |
+
# Try fresh transcript fetch
|
| 1255 |
print(f" π Fetching transcript for video: {video_id}")
|
| 1256 |
transcript_data = youtube_tool.get_transcript(youtube_url)
|
| 1257 |
+
|
| 1258 |
+
if transcript_data.get("success"):
|
| 1259 |
video_transcripts[cache_key] = transcript_data
|
| 1260 |
+
transcript_text = transcript_data.get("transcript", "")[:8000]
|
| 1261 |
+
print(f" β
Transcript fetched: {len(transcript_text)} chars")
|
| 1262 |
else:
|
| 1263 |
+
print(f" β οΈ Transcript failed: {transcript_data.get('error')}")
|
| 1264 |
|
| 1265 |
+
# If no transcript, use web search fallback
|
| 1266 |
+
video_context = ""
|
| 1267 |
+
sources = []
|
| 1268 |
+
links = []
|
| 1269 |
+
|
| 1270 |
+
if not transcript_text:
|
| 1271 |
+
print(f" π Using web search fallback...")
|
| 1272 |
+
try:
|
| 1273 |
+
# Search for video info and summaries
|
| 1274 |
+
if search_tool:
|
| 1275 |
+
search_queries = [
|
| 1276 |
+
f"youtube video {video_id} summary transcript",
|
| 1277 |
+
f"youtube {video_id} key points explained"
|
| 1278 |
+
]
|
| 1279 |
+
|
| 1280 |
+
for sq in search_queries[:1]: # Just one search to save time
|
| 1281 |
+
results = search_tool.search(sq, num_results=4)
|
| 1282 |
+
|
| 1283 |
+
# Get Tavily AI answer
|
| 1284 |
+
if results and results[0].get("tavily_answer"):
|
| 1285 |
+
video_context += f"[Video Summary]: {results[0]['tavily_answer']}\n\n"
|
| 1286 |
+
|
| 1287 |
+
for r in results:
|
| 1288 |
+
url = r.get("url", "")
|
| 1289 |
+
title = r.get("title", "")
|
| 1290 |
+
content = r.get("content", "")
|
| 1291 |
+
|
| 1292 |
+
if content:
|
| 1293 |
+
video_context += f"[{title}]: {content[:1000]}\n\n"
|
| 1294 |
+
links.append({"title": title, "url": url, "snippet": content[:150]})
|
| 1295 |
+
sources.append({"title": title, "url": url})
|
| 1296 |
+
|
| 1297 |
+
print(f" π Web fallback gathered: {len(video_context)} chars, {len(sources)} sources")
|
| 1298 |
+
except Exception as e:
|
| 1299 |
+
print(f" β Web search fallback error: {e}")
|
| 1300 |
|
| 1301 |
+
# Build prompt
|
| 1302 |
q_lower = q.lower()
|
| 1303 |
is_summary = any(word in q_lower for word in ["summarize", "summary", "overview", "main points", "key takeaways", "what is this about"])
|
| 1304 |
|
| 1305 |
if transcript_text:
|
| 1306 |
+
# Have real transcript
|
| 1307 |
if is_summary:
|
| 1308 |
prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing YouTube videos.
|
| 1309 |
|
|
|
|
| 1331 |
- Be specific and cite timestamps when relevant
|
| 1332 |
- If the answer is not in the transcript, say so honestly
|
| 1333 |
- Format your response clearly with bullet points if appropriate"""
|
| 1334 |
+
|
| 1335 |
+
sources = [{"title": "π₯ YouTube Video (Transcript)", "url": youtube_url}]
|
| 1336 |
+
links = [{"title": "Source Video", "url": youtube_url, "snippet": f"Video ID: {video_id} - Full transcript available"}]
|
| 1337 |
+
|
| 1338 |
+
elif video_context:
|
| 1339 |
+
# Have web search fallback context
|
| 1340 |
+
prompt = f"""You are VIDEO BRAIN AI. I couldn't get the direct transcript, but found related information about this video.
|
| 1341 |
+
|
| 1342 |
+
VIDEO URL: {youtube_url}
|
| 1343 |
+
VIDEO ID: {video_id}
|
| 1344 |
+
|
| 1345 |
+
AVAILABLE INFORMATION FROM WEB:
|
| 1346 |
+
{video_context[:6000]}
|
| 1347 |
+
|
| 1348 |
+
USER QUESTION: {q}
|
| 1349 |
+
|
| 1350 |
+
Based on the available information:
|
| 1351 |
+
1. Answer the user's question as best as you can
|
| 1352 |
+
2. Be clear that this is based on web search results, not the actual transcript
|
| 1353 |
+
3. If summarizing, provide the key points found
|
| 1354 |
+
4. Suggest the user can paste the transcript directly for more accurate analysis"""
|
| 1355 |
else:
|
| 1356 |
+
# No information available
|
| 1357 |
error_msg = transcript_data.get("error", "Unknown error") if transcript_data else "Could not fetch transcript"
|
| 1358 |
+
prompt = f"""I couldn't analyze the YouTube video.
|
| 1359 |
|
| 1360 |
Video URL: {youtube_url}
|
| 1361 |
Error: {error_msg}
|
| 1362 |
User Question: {q}
|
| 1363 |
|
| 1364 |
+
Please explain:
|
| 1365 |
+
1. Why the transcript couldn't be fetched (network/DNS issues on this server)
|
| 1366 |
+
2. Alternative: The user can:
|
| 1367 |
+
- Open YouTube, click "..." under the video, select "Show transcript"
|
| 1368 |
+
- Copy and paste the transcript text here
|
| 1369 |
+
- I can then analyze it accurately
|
| 1370 |
+
3. Or they can try a different video"""
|
| 1371 |
|
| 1372 |
try:
|
| 1373 |
msgs = build_context(ws, prompt)
|
|
|
|
| 1376 |
print(f" β LLM error: {e}")
|
| 1377 |
answer = f"Error generating response: {str(e)[:100]}"
|
| 1378 |
|
| 1379 |
+
# Follow-up questions
|
| 1380 |
+
if transcript_text or video_context:
|
| 1381 |
follow = [
|
| 1382 |
"What are the main arguments or points made?",
|
| 1383 |
"Summarize this in 3 bullet points",
|
|
|
|
| 1387 |
]
|
| 1388 |
else:
|
| 1389 |
follow = [
|
| 1390 |
+
"Paste the transcript text here",
|
| 1391 |
"Try a different YouTube video",
|
| 1392 |
+
"How do I get a YouTube transcript?"
|
|
|
|
| 1393 |
]
|
| 1394 |
|
| 1395 |
+
# Add video source if not already added
|
| 1396 |
+
if not sources:
|
| 1397 |
+
sources = [{"title": "π₯ YouTube Video", "url": youtube_url}]
|
| 1398 |
+
if not links:
|
| 1399 |
+
links = [{"title": "Source Video", "url": youtube_url, "snippet": f"Video ID: {video_id}"}]
|
| 1400 |
|
| 1401 |
memory.add(ws, "assistant", answer)
|
| 1402 |
print(f" β
Video Brain: Response generated")
|
tools/youtube_tool.py
CHANGED
|
@@ -6,12 +6,19 @@ Extracts transcripts from YouTube videos for Video Brain mode.
|
|
| 6 |
|
| 7 |
import re
|
| 8 |
from typing import Dict, Optional
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
class YouTubeTool:
|
|
@@ -53,6 +60,15 @@ class YouTubeTool:
|
|
| 53 |
"video_id": None
|
| 54 |
}
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
try:
|
| 57 |
# Try to get transcript (auto-generated or manual)
|
| 58 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
|
@@ -107,7 +123,7 @@ class YouTubeTool:
|
|
| 107 |
"video_id": video_id
|
| 108 |
}
|
| 109 |
|
| 110 |
-
except TranscriptsDisabled:
|
| 111 |
return {
|
| 112 |
"success": False,
|
| 113 |
"error": "Transcripts are disabled for this video",
|
|
@@ -115,7 +131,7 @@ class YouTubeTool:
|
|
| 115 |
"segments": [],
|
| 116 |
"video_id": video_id
|
| 117 |
}
|
| 118 |
-
except NoTranscriptFound:
|
| 119 |
return {
|
| 120 |
"success": False,
|
| 121 |
"error": "No transcript found for this video",
|
|
@@ -123,7 +139,7 @@ class YouTubeTool:
|
|
| 123 |
"segments": [],
|
| 124 |
"video_id": video_id
|
| 125 |
}
|
| 126 |
-
except VideoUnavailable:
|
| 127 |
return {
|
| 128 |
"success": False,
|
| 129 |
"error": "Video is unavailable",
|
|
@@ -132,9 +148,20 @@ class YouTubeTool:
|
|
| 132 |
"video_id": video_id
|
| 133 |
}
|
| 134 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
return {
|
| 136 |
"success": False,
|
| 137 |
-
"error": f"Error fetching transcript: {
|
| 138 |
"transcript": "",
|
| 139 |
"segments": [],
|
| 140 |
"video_id": video_id
|
|
|
|
| 6 |
|
| 7 |
import re
|
| 8 |
from typing import Dict, Optional
|
| 9 |
+
|
| 10 |
+
# Try to import youtube_transcript_api, but handle if it fails
|
| 11 |
+
try:
|
| 12 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 13 |
+
from youtube_transcript_api._errors import (
|
| 14 |
+
TranscriptsDisabled,
|
| 15 |
+
NoTranscriptFound,
|
| 16 |
+
VideoUnavailable
|
| 17 |
+
)
|
| 18 |
+
YOUTUBE_API_AVAILABLE = True
|
| 19 |
+
except ImportError:
|
| 20 |
+
YOUTUBE_API_AVAILABLE = False
|
| 21 |
+
print("β οΈ youtube-transcript-api not available")
|
| 22 |
|
| 23 |
|
| 24 |
class YouTubeTool:
|
|
|
|
| 60 |
"video_id": None
|
| 61 |
}
|
| 62 |
|
| 63 |
+
if not YOUTUBE_API_AVAILABLE:
|
| 64 |
+
return {
|
| 65 |
+
"success": False,
|
| 66 |
+
"error": "YouTube transcript API not available",
|
| 67 |
+
"transcript": "",
|
| 68 |
+
"segments": [],
|
| 69 |
+
"video_id": video_id
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
try:
|
| 73 |
# Try to get transcript (auto-generated or manual)
|
| 74 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
|
|
|
| 123 |
"video_id": video_id
|
| 124 |
}
|
| 125 |
|
| 126 |
+
except TranscriptsDisabled if YOUTUBE_API_AVAILABLE else Exception:
|
| 127 |
return {
|
| 128 |
"success": False,
|
| 129 |
"error": "Transcripts are disabled for this video",
|
|
|
|
| 131 |
"segments": [],
|
| 132 |
"video_id": video_id
|
| 133 |
}
|
| 134 |
+
except NoTranscriptFound if YOUTUBE_API_AVAILABLE else Exception:
|
| 135 |
return {
|
| 136 |
"success": False,
|
| 137 |
"error": "No transcript found for this video",
|
|
|
|
| 139 |
"segments": [],
|
| 140 |
"video_id": video_id
|
| 141 |
}
|
| 142 |
+
except VideoUnavailable if YOUTUBE_API_AVAILABLE else Exception:
|
| 143 |
return {
|
| 144 |
"success": False,
|
| 145 |
"error": "Video is unavailable",
|
|
|
|
| 148 |
"video_id": video_id
|
| 149 |
}
|
| 150 |
except Exception as e:
|
| 151 |
+
error_msg = str(e)
|
| 152 |
+
# Check for network errors
|
| 153 |
+
if "NameResolutionError" in error_msg or "Failed to resolve" in error_msg:
|
| 154 |
+
return {
|
| 155 |
+
"success": False,
|
| 156 |
+
"error": "Network error: Cannot connect to YouTube (DNS resolution failed)",
|
| 157 |
+
"transcript": "",
|
| 158 |
+
"segments": [],
|
| 159 |
+
"video_id": video_id,
|
| 160 |
+
"network_error": True
|
| 161 |
+
}
|
| 162 |
return {
|
| 163 |
"success": False,
|
| 164 |
+
"error": f"Error fetching transcript: {error_msg[:200]}",
|
| 165 |
"transcript": "",
|
| 166 |
"segments": [],
|
| 167 |
"video_id": video_id
|