Spaces:

Naveen-2007
/

perplexity-clone

Running

App Files Files Community

Naveen-2007 commited on Dec 7, 2025

Commit

8e5db5f

1 Parent(s): b47dcd2

Add web search fallback for Video Brain when YouTube transcript fails due to network issues

Browse files

Files changed (2) hide show

app/api.py +93 -32
tools/youtube_tool.py +37 -10

app/api.py CHANGED Viewed

@@ -1197,7 +1197,7 @@ Be detailed, practical, and use real-world best practices. Make it production-re
 # =======================================================
-# VIDEO BRAIN ENDPOINT - YouTube Video Analysis with Transcript
 # =======================================================
 from tools.youtube_tool import YouTubeTool
 youtube_tool = YouTubeTool()
@@ -1215,8 +1215,8 @@ class VideoBrainRequest(BaseModel):
 @app.post("/api/video_brain", response_model=ChatResponse)
 def video_brain_mode(req: VideoBrainRequest):
     """
-    Video Brain Mode - Analyzes YouTube videos using actual transcripts.
-    Extracts real transcript and provides accurate responses.
     """
     q = req.message.strip()
     ws = req.workspace_id
@@ -1237,35 +1237,73 @@ def video_brain_mode(req: VideoBrainRequest):
             workspace_id=ws
         )
-    # Check if we already have transcript for this video
     video_id = youtube_tool.extract_video_id(youtube_url)
     cache_key = f"{ws}_{video_id}"
     transcript_data = None
     if cache_key in video_transcripts:
         transcript_data = video_transcripts[cache_key]
-        print(f"  📋 Using cached transcript for {video_id}")
-    else:
-        # Fetch new transcript
         print(f"  🔄 Fetching transcript for video: {video_id}")
         transcript_data = youtube_tool.get_transcript(youtube_url)
-        if transcript_data["success"]:
             video_transcripts[cache_key] = transcript_data
-            print(f"  ✅ Transcript fetched: {len(transcript_data.get('transcript', ''))} chars")
         else:
-            print(f"  ⚠️ Transcript fetch failed: {transcript_data.get('error')}")
-    # Build context for LLM
-    transcript_text = ""
-    if transcript_data and transcript_data.get("success"):
-        # Use clean transcript for context (with timestamps)
-        transcript_text = transcript_data.get("transcript", "")[:8000]  # Limit for context window
-    # Generate appropriate prompt based on question type
     q_lower = q.lower()
     is_summary = any(word in q_lower for word in ["summarize", "summary", "overview", "main points", "key takeaways", "what is this about"])
     if transcript_text:
         if is_summary:
             prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing YouTube videos.
@@ -1293,22 +1331,43 @@ Answer the question using ONLY the information from the transcript above.
 - Be specific and cite timestamps when relevant
 - If the answer is not in the transcript, say so honestly
 - Format your response clearly with bullet points if appropriate"""
     else:
-        # No transcript available - explain why
         error_msg = transcript_data.get("error", "Unknown error") if transcript_data else "Could not fetch transcript"
-        prompt = f"""The user asked about a YouTube video but I couldn't extract the transcript.
 Video URL: {youtube_url}
 Error: {error_msg}
 User Question: {q}
-Explain that:
-1. The transcript couldn't be fetched (reason: {error_msg})
-2. Suggest they try:
-   - A different video that has captions/subtitles enabled
-   - Checking if the video is public and available
-   - Using YouTube's built-in transcript feature (click ... > Show transcript)
-3. Offer to help once they can provide the transcript text manually"""
     try:
         msgs = build_context(ws, prompt)
@@ -1317,8 +1376,8 @@ Explain that:
         print(f"  ❌ LLM error: {e}")
         answer = f"Error generating response: {str(e)[:100]}"
-    # Generate contextual follow-up questions
-    if transcript_text:
         follow = [
             "What are the main arguments or points made?",
             "Summarize this in 3 bullet points",
@@ -1328,14 +1387,16 @@ Explain that:
         ]
     else:
         follow = [
             "Try a different YouTube video",
-            "How do I enable captions on YouTube?",
-            "What videos work best with Video Brain?"
         ]
-    # Build sources
-    sources = [{"title": f"🎥 YouTube Video", "url": youtube_url}]
-    links = [{"title": "Source Video", "url": youtube_url, "snippet": f"Video ID: {video_id}"}]
     memory.add(ws, "assistant", answer)
     print(f"  ✅ Video Brain: Response generated")

 # =======================================================
+# VIDEO BRAIN ENDPOINT - YouTube Video Analysis with Transcript + Web Fallback
 # =======================================================
 from tools.youtube_tool import YouTubeTool
 youtube_tool = YouTubeTool()
 @app.post("/api/video_brain", response_model=ChatResponse)
 def video_brain_mode(req: VideoBrainRequest):
     """
+    Video Brain Mode - Analyzes YouTube videos.
+    Uses transcript extraction with web search fallback.
     """
     q = req.message.strip()
     ws = req.workspace_id
             workspace_id=ws
         )
     video_id = youtube_tool.extract_video_id(youtube_url)
     cache_key = f"{ws}_{video_id}"
+    # Try to get transcript
+    transcript_text = ""
     transcript_data = None
     if cache_key in video_transcripts:
         transcript_data = video_transcripts[cache_key]
+        if transcript_data.get("success"):
+            transcript_text = transcript_data.get("transcript", "")[:8000]
+            print(f"  📋 Using cached transcript")
+    if not transcript_text:
+        # Try fresh transcript fetch
         print(f"  🔄 Fetching transcript for video: {video_id}")
         transcript_data = youtube_tool.get_transcript(youtube_url)
+        if transcript_data.get("success"):
             video_transcripts[cache_key] = transcript_data
+            transcript_text = transcript_data.get("transcript", "")[:8000]
+            print(f"  ✅ Transcript fetched: {len(transcript_text)} chars")
         else:
+            print(f"  ⚠️ Transcript failed: {transcript_data.get('error')}")
+    # If no transcript, use web search fallback
+    video_context = ""
+    sources = []
+    links = []
+    if not transcript_text:
+        print(f"  🌐 Using web search fallback...")
+        try:
+            # Search for video info and summaries
+            if search_tool:
+                search_queries = [
+                    f"youtube video {video_id} summary transcript",
+                    f"youtube {video_id} key points explained"
+                ]
+                for sq in search_queries[:1]:  # Just one search to save time
+                    results = search_tool.search(sq, num_results=4)
+                    # Get Tavily AI answer
+                    if results and results[0].get("tavily_answer"):
+                        video_context += f"[Video Summary]: {results[0]['tavily_answer']}\n\n"
+                    for r in results:
+                        url = r.get("url", "")
+                        title = r.get("title", "")
+                        content = r.get("content", "")
+                        if content:
+                            video_context += f"[{title}]: {content[:1000]}\n\n"
+                            links.append({"title": title, "url": url, "snippet": content[:150]})
+                            sources.append({"title": title, "url": url})
+                print(f"  📄 Web fallback gathered: {len(video_context)} chars, {len(sources)} sources")
+        except Exception as e:
+            print(f"  ❌ Web search fallback error: {e}")
+    # Build prompt
     q_lower = q.lower()
     is_summary = any(word in q_lower for word in ["summarize", "summary", "overview", "main points", "key takeaways", "what is this about"])
     if transcript_text:
+        # Have real transcript
         if is_summary:
             prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing YouTube videos.
 - Be specific and cite timestamps when relevant
 - If the answer is not in the transcript, say so honestly
 - Format your response clearly with bullet points if appropriate"""
+        sources = [{"title": "🎥 YouTube Video (Transcript)", "url": youtube_url}]
+        links = [{"title": "Source Video", "url": youtube_url, "snippet": f"Video ID: {video_id} - Full transcript available"}]
+    elif video_context:
+        # Have web search fallback context
+        prompt = f"""You are VIDEO BRAIN AI. I couldn't get the direct transcript, but found related information about this video.
+VIDEO URL: {youtube_url}
+VIDEO ID: {video_id}
+AVAILABLE INFORMATION FROM WEB:
+{video_context[:6000]}
+USER QUESTION: {q}
+Based on the available information:
+1. Answer the user's question as best as you can
+2. Be clear that this is based on web search results, not the actual transcript
+3. If summarizing, provide the key points found
+4. Suggest the user can paste the transcript directly for more accurate analysis"""
     else:
+        # No information available
         error_msg = transcript_data.get("error", "Unknown error") if transcript_data else "Could not fetch transcript"
+        prompt = f"""I couldn't analyze the YouTube video.
 Video URL: {youtube_url}
 Error: {error_msg}
 User Question: {q}
+Please explain:
+1. Why the transcript couldn't be fetched (network/DNS issues on this server)
+2. Alternative: The user can:
+   - Open YouTube, click "..." under the video, select "Show transcript"
+   - Copy and paste the transcript text here
+   - I can then analyze it accurately
+3. Or they can try a different video"""
     try:
         msgs = build_context(ws, prompt)
         print(f"  ❌ LLM error: {e}")
         answer = f"Error generating response: {str(e)[:100]}"
+    # Follow-up questions
+    if transcript_text or video_context:
         follow = [
             "What are the main arguments or points made?",
             "Summarize this in 3 bullet points",
         ]
     else:
         follow = [
+            "Paste the transcript text here",
             "Try a different YouTube video",
+            "How do I get a YouTube transcript?"
         ]
+    # Add video source if not already added
+    if not sources:
+        sources = [{"title": "🎥 YouTube Video", "url": youtube_url}]
+    if not links:
+        links = [{"title": "Source Video", "url": youtube_url, "snippet": f"Video ID: {video_id}"}]
     memory.add(ws, "assistant", answer)
     print(f"  ✅ Video Brain: Response generated")

tools/youtube_tool.py CHANGED Viewed

@@ -6,12 +6,19 @@ Extracts transcripts from YouTube videos for Video Brain mode.
 import re
 from typing import Dict, Optional
-from youtube_transcript_api import YouTubeTranscriptApi
-from youtube_transcript_api._errors import (
-    TranscriptsDisabled,
-    NoTranscriptFound,
-    VideoUnavailable
-)
 class YouTubeTool:
@@ -53,6 +60,15 @@ class YouTubeTool:
                 "video_id": None
             }
         try:
             # Try to get transcript (auto-generated or manual)
             transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
@@ -107,7 +123,7 @@ class YouTubeTool:
                     "video_id": video_id
                 }
-        except TranscriptsDisabled:
             return {
                 "success": False,
                 "error": "Transcripts are disabled for this video",
@@ -115,7 +131,7 @@ class YouTubeTool:
                 "segments": [],
                 "video_id": video_id
             }
-        except NoTranscriptFound:
             return {
                 "success": False,
                 "error": "No transcript found for this video",
@@ -123,7 +139,7 @@ class YouTubeTool:
                 "segments": [],
                 "video_id": video_id
             }
-        except VideoUnavailable:
             return {
                 "success": False,
                 "error": "Video is unavailable",
@@ -132,9 +148,20 @@ class YouTubeTool:
                 "video_id": video_id
             }
         except Exception as e:
             return {
                 "success": False,
-                "error": f"Error fetching transcript: {str(e)}",
                 "transcript": "",
                 "segments": [],
                 "video_id": video_id

 import re
 from typing import Dict, Optional
+# Try to import youtube_transcript_api, but handle if it fails
+try:
+    from youtube_transcript_api import YouTubeTranscriptApi
+    from youtube_transcript_api._errors import (
+        TranscriptsDisabled,
+        NoTranscriptFound,
+        VideoUnavailable
+    )
+    YOUTUBE_API_AVAILABLE = True
+except ImportError:
+    YOUTUBE_API_AVAILABLE = False
+    print("⚠️ youtube-transcript-api not available")
 class YouTubeTool:
                 "video_id": None
             }
+        if not YOUTUBE_API_AVAILABLE:
+            return {
+                "success": False,
+                "error": "YouTube transcript API not available",
+                "transcript": "",
+                "segments": [],
+                "video_id": video_id
+            }
         try:
             # Try to get transcript (auto-generated or manual)
             transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
                     "video_id": video_id
                 }
+        except TranscriptsDisabled if YOUTUBE_API_AVAILABLE else Exception:
             return {
                 "success": False,
                 "error": "Transcripts are disabled for this video",
                 "segments": [],
                 "video_id": video_id
             }
+        except NoTranscriptFound if YOUTUBE_API_AVAILABLE else Exception:
             return {
                 "success": False,
                 "error": "No transcript found for this video",
                 "segments": [],
                 "video_id": video_id
             }
+        except VideoUnavailable if YOUTUBE_API_AVAILABLE else Exception:
             return {
                 "success": False,
                 "error": "Video is unavailable",
                 "video_id": video_id
             }
         except Exception as e:
+            error_msg = str(e)
+            # Check for network errors
+            if "NameResolutionError" in error_msg or "Failed to resolve" in error_msg:
+                return {
+                    "success": False,
+                    "error": "Network error: Cannot connect to YouTube (DNS resolution failed)",
+                    "transcript": "",
+                    "segments": [],
+                    "video_id": video_id,
+                    "network_error": True
+                }
             return {
                 "success": False,
+                "error": f"Error fetching transcript: {error_msg[:200]}",
                 "transcript": "",
                 "segments": [],
                 "video_id": video_id