Spaces:

Naveen-2007
/

perplexity-clone

Running

App Files Files Community

Naveen-2007 commited on Dec 7, 2025

Commit

b47dcd2

1 Parent(s): d76cab0

Add YouTube transcript extraction, improve Video Brain with follow-up questions, fix all modes

Browse files

Files changed (4) hide show

app/api.py +99 -71
requirements.txt +3 -0
streamlit_app.py +35 -0
tools/youtube_tool.py +150 -0

app/api.py CHANGED Viewed

@@ -1197,8 +1197,14 @@ Be detailed, practical, and use real-world best practices. Make it production-re
 # =======================================================
-# VIDEO BRAIN ENDPOINT - YouTube Video Analysis
 # =======================================================
 class VideoBrainRequest(BaseModel):
     message: str
     workspace_id: str = "default"
@@ -1209,8 +1215,8 @@ class VideoBrainRequest(BaseModel):
 @app.post("/api/video_brain", response_model=ChatResponse)
 def video_brain_mode(req: VideoBrainRequest):
     """
-    Video Brain Mode - Analyzes YouTube videos and answers questions about them.
-    Extracts transcript/content and provides intelligent responses.
     """
     q = req.message.strip()
     ws = req.workspace_id
@@ -1222,7 +1228,7 @@ def video_brain_mode(req: VideoBrainRequest):
     if not youtube_url:
         return ChatResponse(
-            answer="⚠️ Please provide a YouTube URL first. Enter the URL in the Video Brain interface and click 'Load' before asking questions.",
             sources=[],
             links=[],
             images=[],
@@ -1231,83 +1237,105 @@ def video_brain_mode(req: VideoBrainRequest):
             workspace_id=ws
         )
-    # Try to get video information
-    video_content = ""
-    video_title = ""
-    try:
-        # Extract video ID
-        video_id = ""
-        if "v=" in youtube_url:
-            video_id = youtube_url.split("v=")[1].split("&")[0]
-        elif "youtu.be/" in youtube_url:
-            video_id = youtube_url.split("youtu.be/")[1].split("?")[0]
-        print(f"  🔍 Video ID: {video_id}")
-        # Search for video information and related content
-        if video_id:
-            # Search for the video title and description
-            topic_results = search_tool.search(f"youtube {video_id}", num_results=3)
-            if topic_results:
-                for r in topic_results:
-                    title = r.get("title", "")
-                    if title and not video_title:
-                        video_title = title
-                    snippet = r.get("content", "") or r.get("snippet", "")
-                    if snippet:
-                        video_content += snippet + "\n"
-        # Search for transcript or summary
-        search_query = f"youtube video transcript summary {video_title or video_id}"
-        results = search_tool.search(search_query, num_results=3)
-        for r in results[:2]:
-            url = r.get("url", "")
-            if url and "youtube.com" not in url:  # Skip YouTube pages, get transcripts
-                text = browse_tool.fetch_clean(url)
-                if text:
-                    video_content += text[:2000] + "\n\n"
-        print(f"  📝 Content gathered: {len(video_content)} chars")
-    except Exception as e:
-        print(f"  ❌ Video content fetch error: {e}")
-    prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing and explaining YouTube video content.
-VIDEO URL: {youtube_url}
-{f"VIDEO TITLE: {video_title}" if video_title else ""}
-{f"AVAILABLE VIDEO CONTEXT:{chr(10)}{video_content[:4000]}" if video_content else "Note: Could not fetch video transcript directly. I will provide helpful guidance based on the question and general knowledge."}
-USER QUESTION: {q}
-Instructions:
-1. If context is available, answer based on the video content
-2. If the question is about summarizing, provide key points and takeaways
-3. If asking about specific topics, explain them clearly
-4. Use timestamps if available (e.g., "At around 5:30...")
-5. If limited information is available, be honest but still provide helpful guidance
-6. Format your response with headers and bullet points for clarity
-7. Make the response educational and easy to understand
-Provide a comprehensive, helpful response:"""
-    msgs = build_context(ws, prompt)
-    answer = llm.invoke(msgs).content
-    # Generate follow-up questions about the video
-    follow = [
-        "Summarize the main points of this video",
-        "What are the key takeaways?",
-        "Explain the most important concept covered",
-        "What questions should I ask about this topic?",
-        "Create study notes from this video"
-    ]
-    sources = [{"title": f"🎥 {video_title or 'YouTube Video'}", "url": youtube_url}]
-    links = [{"title": video_title or "YouTube Video", "url": youtube_url, "snippet": "Source video"}]
     memory.add(ws, "assistant", answer)
     print(f"  ✅ Video Brain: Response generated")

 # =======================================================
+# VIDEO BRAIN ENDPOINT - YouTube Video Analysis with Transcript
 # =======================================================
+from tools.youtube_tool import YouTubeTool
+youtube_tool = YouTubeTool()
+# Store video transcripts in memory per workspace
+video_transcripts = {}
 class VideoBrainRequest(BaseModel):
     message: str
     workspace_id: str = "default"
 @app.post("/api/video_brain", response_model=ChatResponse)
 def video_brain_mode(req: VideoBrainRequest):
     """
+    Video Brain Mode - Analyzes YouTube videos using actual transcripts.
+    Extracts real transcript and provides accurate responses.
     """
     q = req.message.strip()
     ws = req.workspace_id
     if not youtube_url:
         return ChatResponse(
+            answer="⚠️ Please provide a YouTube URL first. Paste the URL in the search box or load it above.",
             sources=[],
             links=[],
             images=[],
             workspace_id=ws
         )
+    # Check if we already have transcript for this video
+    video_id = youtube_tool.extract_video_id(youtube_url)
+    cache_key = f"{ws}_{video_id}"
+    transcript_data = None
+    if cache_key in video_transcripts:
+        transcript_data = video_transcripts[cache_key]
+        print(f"  📋 Using cached transcript for {video_id}")
+    else:
+        # Fetch new transcript
+        print(f"  🔄 Fetching transcript for video: {video_id}")
+        transcript_data = youtube_tool.get_transcript(youtube_url)
+        if transcript_data["success"]:
+            video_transcripts[cache_key] = transcript_data
+            print(f"  ✅ Transcript fetched: {len(transcript_data.get('transcript', ''))} chars")
+        else:
+            print(f"  ⚠️ Transcript fetch failed: {transcript_data.get('error')}")
+    # Build context for LLM
+    transcript_text = ""
+    if transcript_data and transcript_data.get("success"):
+        # Use clean transcript for context (with timestamps)
+        transcript_text = transcript_data.get("transcript", "")[:8000]  # Limit for context window
+    # Generate appropriate prompt based on question type
+    q_lower = q.lower()
+    is_summary = any(word in q_lower for word in ["summarize", "summary", "overview", "main points", "key takeaways", "what is this about"])
+    if transcript_text:
+        if is_summary:
+            prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing YouTube videos.
+VIDEO TRANSCRIPT (with timestamps [MM:SS]):
+{transcript_text}
+USER REQUEST: {q}
+Provide a comprehensive summary with:
+1. **Overview**: One paragraph describing what the video is about
+2. **Key Points**: 5-7 main takeaways with timestamps
+3. **Important Details**: Any specific facts, figures, or examples mentioned
+4. **Actionable Insights**: What viewers should do or remember
+Use the actual content from the transcript. Reference timestamps like [5:30] when citing specific parts."""
+        else:
+            prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing YouTube videos.
+VIDEO TRANSCRIPT (with timestamps [MM:SS]):
+{transcript_text}
+USER QUESTION: {q}
+Answer the question using ONLY the information from the transcript above.
+- Be specific and cite timestamps when relevant
+- If the answer is not in the transcript, say so honestly
+- Format your response clearly with bullet points if appropriate"""
+    else:
+        # No transcript available - explain why
+        error_msg = transcript_data.get("error", "Unknown error") if transcript_data else "Could not fetch transcript"
+        prompt = f"""The user asked about a YouTube video but I couldn't extract the transcript.
+Video URL: {youtube_url}
+Error: {error_msg}
+User Question: {q}
+Explain that:
+1. The transcript couldn't be fetched (reason: {error_msg})
+2. Suggest they try:
+   - A different video that has captions/subtitles enabled
+   - Checking if the video is public and available
+   - Using YouTube's built-in transcript feature (click ... > Show transcript)
+3. Offer to help once they can provide the transcript text manually"""
+    try:
+        msgs = build_context(ws, prompt)
+        answer = llm.invoke(msgs).content
+    except Exception as e:
+        print(f"  ❌ LLM error: {e}")
+        answer = f"Error generating response: {str(e)[:100]}"
+    # Generate contextual follow-up questions
+    if transcript_text:
+        follow = [
+            "What are the main arguments or points made?",
+            "Summarize this in 3 bullet points",
+            "What examples or case studies are mentioned?",
+            "What should I learn from this video?",
+            "Explain the most complex concept in simple terms"
+        ]
+    else:
+        follow = [
+            "Try a different YouTube video",
+            "How do I enable captions on YouTube?",
+            "What videos work best with Video Brain?"
+        ]
+    # Build sources
+    sources = [{"title": f"🎥 YouTube Video", "url": youtube_url}]
+    links = [{"title": "Source Video", "url": youtube_url, "snippet": f"Video ID: {video_id}"}]
     memory.add(ws, "assistant", answer)
     print(f"  ✅ Video Brain: Response generated")

requirements.txt CHANGED Viewed

@@ -44,6 +44,9 @@ trafilatura==1.6.3
 # Wikipedia
 wikipedia==1.4.0
 # Document processing
 pypdf==4.0.1
 python-pptx==0.6.23

 # Wikipedia
 wikipedia==1.4.0
+# YouTube transcript extraction
+youtube-transcript-api==0.6.2
 # Document processing
 pypdf==4.0.1
 python-pptx==0.6.23

streamlit_app.py CHANGED Viewed

@@ -801,6 +801,16 @@ if st.session_state.current_result:
             <h4 style="margin: 0;">🎥 Video Analysis</h4>
         </div>
         """, unsafe_allow_html=True)
     # Query box
     mode_info = MODES.get(result['mode'], MODES['Automatic'])
@@ -829,6 +839,31 @@ if st.session_state.current_result:
         # Display answer directly with markdown
         st.markdown(answer)
         followups = data.get("followups", [])
         if followups:
             st.markdown("**Related:**")

             <h4 style="margin: 0;">🎥 Video Analysis</h4>
         </div>
         """, unsafe_allow_html=True)
+        # Show embedded video
+        if st.session_state.youtube_url:
+            video_id = ""
+            if "v=" in st.session_state.youtube_url:
+                video_id = st.session_state.youtube_url.split("v=")[1].split("&")[0]
+            elif "youtu.be/" in st.session_state.youtube_url:
+                video_id = st.session_state.youtube_url.split("youtu.be/")[1].split("?")[0]
+            if video_id:
+                st.video(f"https://www.youtube.com/watch?v={video_id}")
     # Query box
     mode_info = MODES.get(result['mode'], MODES['Automatic'])
         # Display answer directly with markdown
         st.markdown(answer)
+        # For Video Brain mode, show a follow-up question input
+        if result['mode'] == "Video Brain" and st.session_state.youtube_url:
+            st.divider()
+            st.markdown("**💬 Ask another question about this video:**")
+            followup_question = st.text_input(
+                "Follow-up question",
+                placeholder="e.g., What are the main arguments? Explain the key concept...",
+                key="video_followup_input",
+                label_visibility="collapsed"
+            )
+            if st.button("Ask", key="video_followup_btn"):
+                if followup_question.strip():
+                    with st.spinner("Analyzing..."):
+                        new_result = call_api(
+                            followup_question.strip(),
+                            "Video Brain",
+                            {"youtube_url": st.session_state.youtube_url}
+                        )
+                        st.session_state.current_result = {
+                            "query": followup_question.strip(),
+                            "mode": "Video Brain",
+                            "data": new_result
+                        }
+                    st.rerun()
         followups = data.get("followups", [])
         if followups:
             st.markdown("**Related:**")

tools/youtube_tool.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# tools/youtube_tool.py
+"""
+YouTube Transcript Extraction Tool
+Extracts transcripts from YouTube videos for Video Brain mode.
+"""
+import re
+from typing import Dict, Optional
+from youtube_transcript_api import YouTubeTranscriptApi
+from youtube_transcript_api._errors import (
+    TranscriptsDisabled,
+    NoTranscriptFound,
+    VideoUnavailable
+)
+class YouTubeTool:
+    """Extract transcripts and metadata from YouTube videos."""
+    def extract_video_id(self, url: str) -> Optional[str]:
+        """Extract video ID from various YouTube URL formats."""
+        patterns = [
+            r'(?:v=|/v/|youtu\.be/|/embed/)([a-zA-Z0-9_-]{11})',
+            r'([a-zA-Z0-9_-]{11})'
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, url)
+            if match:
+                return match.group(1)
+        return None
+    def get_transcript(self, video_url: str) -> Dict:
+        """
+        Get transcript from a YouTube video.
+        Returns:
+            Dict with keys:
+            - success: bool
+            - transcript: str (full transcript text)
+            - segments: list of {text, start, duration}
+            - video_id: str
+            - error: str (if failed)
+        """
+        video_id = self.extract_video_id(video_url)
+        if not video_id:
+            return {
+                "success": False,
+                "error": "Could not extract video ID from URL",
+                "transcript": "",
+                "segments": [],
+                "video_id": None
+            }
+        try:
+            # Try to get transcript (auto-generated or manual)
+            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+            # Try to find English transcript first
+            transcript = None
+            try:
+                transcript = transcript_list.find_transcript(['en', 'en-US', 'en-GB'])
+            except:
+                # Fall back to any available transcript, translated to English
+                try:
+                    for t in transcript_list:
+                        transcript = t.translate('en')
+                        break
+                except:
+                    # Just get any transcript
+                    for t in transcript_list:
+                        transcript = t
+                        break
+            if transcript:
+                segments = transcript.fetch()
+                # Build full transcript text with timestamps
+                full_text_parts = []
+                for seg in segments:
+                    start_time = int(seg['start'])
+                    minutes = start_time // 60
+                    seconds = start_time % 60
+                    timestamp = f"[{minutes}:{seconds:02d}]"
+                    full_text_parts.append(f"{timestamp} {seg['text']}")
+                full_transcript = "\n".join(full_text_parts)
+                # Also create a clean version without timestamps
+                clean_text = " ".join([seg['text'] for seg in segments])
+                return {
+                    "success": True,
+                    "transcript": full_transcript,
+                    "clean_transcript": clean_text,
+                    "segments": segments,
+                    "video_id": video_id,
+                    "error": None
+                }
+            else:
+                return {
+                    "success": False,
+                    "error": "No transcript available for this video",
+                    "transcript": "",
+                    "segments": [],
+                    "video_id": video_id
+                }
+        except TranscriptsDisabled:
+            return {
+                "success": False,
+                "error": "Transcripts are disabled for this video",
+                "transcript": "",
+                "segments": [],
+                "video_id": video_id
+            }
+        except NoTranscriptFound:
+            return {
+                "success": False,
+                "error": "No transcript found for this video",
+                "transcript": "",
+                "segments": [],
+                "video_id": video_id
+            }
+        except VideoUnavailable:
+            return {
+                "success": False,
+                "error": "Video is unavailable",
+                "transcript": "",
+                "segments": [],
+                "video_id": video_id
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": f"Error fetching transcript: {str(e)}",
+                "transcript": "",
+                "segments": [],
+                "video_id": video_id
+            }
+    def get_video_info(self, video_url: str) -> Dict:
+        """Get basic video info by searching."""
+        video_id = self.extract_video_id(video_url)
+        return {
+            "video_id": video_id,
+            "url": video_url,
+            "embed_url": f"https://www.youtube.com/embed/{video_id}" if video_id else None
+        }