Naveen-2007 commited on
Commit
b47dcd2
·
1 Parent(s): d76cab0

Add YouTube transcript extraction, improve Video Brain with follow-up questions, fix all modes

Browse files
Files changed (4) hide show
  1. app/api.py +99 -71
  2. requirements.txt +3 -0
  3. streamlit_app.py +35 -0
  4. tools/youtube_tool.py +150 -0
app/api.py CHANGED
@@ -1197,8 +1197,14 @@ Be detailed, practical, and use real-world best practices. Make it production-re
1197
 
1198
 
1199
  # =======================================================
1200
- # VIDEO BRAIN ENDPOINT - YouTube Video Analysis
1201
  # =======================================================
 
 
 
 
 
 
1202
  class VideoBrainRequest(BaseModel):
1203
  message: str
1204
  workspace_id: str = "default"
@@ -1209,8 +1215,8 @@ class VideoBrainRequest(BaseModel):
1209
  @app.post("/api/video_brain", response_model=ChatResponse)
1210
  def video_brain_mode(req: VideoBrainRequest):
1211
  """
1212
- Video Brain Mode - Analyzes YouTube videos and answers questions about them.
1213
- Extracts transcript/content and provides intelligent responses.
1214
  """
1215
  q = req.message.strip()
1216
  ws = req.workspace_id
@@ -1222,7 +1228,7 @@ def video_brain_mode(req: VideoBrainRequest):
1222
 
1223
  if not youtube_url:
1224
  return ChatResponse(
1225
- answer="⚠️ Please provide a YouTube URL first. Enter the URL in the Video Brain interface and click 'Load' before asking questions.",
1226
  sources=[],
1227
  links=[],
1228
  images=[],
@@ -1231,83 +1237,105 @@ def video_brain_mode(req: VideoBrainRequest):
1231
  workspace_id=ws
1232
  )
1233
 
1234
- # Try to get video information
1235
- video_content = ""
1236
- video_title = ""
1237
 
1238
- try:
1239
- # Extract video ID
1240
- video_id = ""
1241
- if "v=" in youtube_url:
1242
- video_id = youtube_url.split("v=")[1].split("&")[0]
1243
- elif "youtu.be/" in youtube_url:
1244
- video_id = youtube_url.split("youtu.be/")[1].split("?")[0]
1245
-
1246
- print(f" 🔍 Video ID: {video_id}")
1247
-
1248
- # Search for video information and related content
1249
- if video_id:
1250
- # Search for the video title and description
1251
- topic_results = search_tool.search(f"youtube {video_id}", num_results=3)
1252
- if topic_results:
1253
- for r in topic_results:
1254
- title = r.get("title", "")
1255
- if title and not video_title:
1256
- video_title = title
1257
- snippet = r.get("content", "") or r.get("snippet", "")
1258
- if snippet:
1259
- video_content += snippet + "\n"
1260
-
1261
- # Search for transcript or summary
1262
- search_query = f"youtube video transcript summary {video_title or video_id}"
1263
- results = search_tool.search(search_query, num_results=3)
1264
-
1265
- for r in results[:2]:
1266
- url = r.get("url", "")
1267
- if url and "youtube.com" not in url: # Skip YouTube pages, get transcripts
1268
- text = browse_tool.fetch_clean(url)
1269
- if text:
1270
- video_content += text[:2000] + "\n\n"
1271
-
1272
- print(f" 📝 Content gathered: {len(video_content)} chars")
1273
-
1274
- except Exception as e:
1275
- print(f" ❌ Video content fetch error: {e}")
1276
 
1277
- prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing and explaining YouTube video content.
 
 
 
 
 
 
1278
 
1279
- VIDEO URL: {youtube_url}
1280
- {f"VIDEO TITLE: {video_title}" if video_title else ""}
1281
 
1282
- {f"AVAILABLE VIDEO CONTEXT:{chr(10)}{video_content[:4000]}" if video_content else "Note: Could not fetch video transcript directly. I will provide helpful guidance based on the question and general knowledge."}
1283
 
1284
- USER QUESTION: {q}
 
 
 
 
1285
 
1286
- Instructions:
1287
- 1. If context is available, answer based on the video content
1288
- 2. If the question is about summarizing, provide key points and takeaways
1289
- 3. If asking about specific topics, explain them clearly
1290
- 4. Use timestamps if available (e.g., "At around 5:30...")
1291
- 5. If limited information is available, be honest but still provide helpful guidance
1292
- 6. Format your response with headers and bullet points for clarity
1293
- 7. Make the response educational and easy to understand
1294
 
1295
- Provide a comprehensive, helpful response:"""
 
1296
 
1297
- msgs = build_context(ws, prompt)
1298
- answer = llm.invoke(msgs).content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1299
 
1300
- # Generate follow-up questions about the video
1301
- follow = [
1302
- "Summarize the main points of this video",
1303
- "What are the key takeaways?",
1304
- "Explain the most important concept covered",
1305
- "What questions should I ask about this topic?",
1306
- "Create study notes from this video"
1307
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1308
 
1309
- sources = [{"title": f"🎥 {video_title or 'YouTube Video'}", "url": youtube_url}]
1310
- links = [{"title": video_title or "YouTube Video", "url": youtube_url, "snippet": "Source video"}]
 
1311
 
1312
  memory.add(ws, "assistant", answer)
1313
  print(f" ✅ Video Brain: Response generated")
 
1197
 
1198
 
1199
  # =======================================================
1200
+ # VIDEO BRAIN ENDPOINT - YouTube Video Analysis with Transcript
1201
  # =======================================================
1202
+ from tools.youtube_tool import YouTubeTool
1203
+ youtube_tool = YouTubeTool()
1204
+
1205
+ # Store video transcripts in memory per workspace
1206
+ video_transcripts = {}
1207
+
1208
  class VideoBrainRequest(BaseModel):
1209
  message: str
1210
  workspace_id: str = "default"
 
1215
  @app.post("/api/video_brain", response_model=ChatResponse)
1216
  def video_brain_mode(req: VideoBrainRequest):
1217
  """
1218
+ Video Brain Mode - Analyzes YouTube videos using actual transcripts.
1219
+ Extracts real transcript and provides accurate responses.
1220
  """
1221
  q = req.message.strip()
1222
  ws = req.workspace_id
 
1228
 
1229
  if not youtube_url:
1230
  return ChatResponse(
1231
+ answer="⚠️ Please provide a YouTube URL first. Paste the URL in the search box or load it above.",
1232
  sources=[],
1233
  links=[],
1234
  images=[],
 
1237
  workspace_id=ws
1238
  )
1239
 
1240
+ # Check if we already have transcript for this video
1241
+ video_id = youtube_tool.extract_video_id(youtube_url)
1242
+ cache_key = f"{ws}_{video_id}"
1243
 
1244
+ transcript_data = None
1245
+ if cache_key in video_transcripts:
1246
+ transcript_data = video_transcripts[cache_key]
1247
+ print(f" 📋 Using cached transcript for {video_id}")
1248
+ else:
1249
+ # Fetch new transcript
1250
+ print(f" 🔄 Fetching transcript for video: {video_id}")
1251
+ transcript_data = youtube_tool.get_transcript(youtube_url)
1252
+ if transcript_data["success"]:
1253
+ video_transcripts[cache_key] = transcript_data
1254
+ print(f" ✅ Transcript fetched: {len(transcript_data.get('transcript', ''))} chars")
1255
+ else:
1256
+ print(f" ⚠️ Transcript fetch failed: {transcript_data.get('error')}")
1257
+
1258
+ # Build context for LLM
1259
+ transcript_text = ""
1260
+ if transcript_data and transcript_data.get("success"):
1261
+ # Use clean transcript for context (with timestamps)
1262
+ transcript_text = transcript_data.get("transcript", "")[:8000] # Limit for context window
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1263
 
1264
+ # Generate appropriate prompt based on question type
1265
+ q_lower = q.lower()
1266
+ is_summary = any(word in q_lower for word in ["summarize", "summary", "overview", "main points", "key takeaways", "what is this about"])
1267
+
1268
+ if transcript_text:
1269
+ if is_summary:
1270
+ prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing YouTube videos.
1271
 
1272
+ VIDEO TRANSCRIPT (with timestamps [MM:SS]):
1273
+ {transcript_text}
1274
 
1275
+ USER REQUEST: {q}
1276
 
1277
+ Provide a comprehensive summary with:
1278
+ 1. **Overview**: One paragraph describing what the video is about
1279
+ 2. **Key Points**: 5-7 main takeaways with timestamps
1280
+ 3. **Important Details**: Any specific facts, figures, or examples mentioned
1281
+ 4. **Actionable Insights**: What viewers should do or remember
1282
 
1283
+ Use the actual content from the transcript. Reference timestamps like [5:30] when citing specific parts."""
1284
+ else:
1285
+ prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing YouTube videos.
 
 
 
 
 
1286
 
1287
+ VIDEO TRANSCRIPT (with timestamps [MM:SS]):
1288
+ {transcript_text}
1289
 
1290
+ USER QUESTION: {q}
1291
+
1292
+ Answer the question using ONLY the information from the transcript above.
1293
+ - Be specific and cite timestamps when relevant
1294
+ - If the answer is not in the transcript, say so honestly
1295
+ - Format your response clearly with bullet points if appropriate"""
1296
+ else:
1297
+ # No transcript available - explain why
1298
+ error_msg = transcript_data.get("error", "Unknown error") if transcript_data else "Could not fetch transcript"
1299
+ prompt = f"""The user asked about a YouTube video but I couldn't extract the transcript.
1300
+
1301
+ Video URL: {youtube_url}
1302
+ Error: {error_msg}
1303
+ User Question: {q}
1304
+
1305
+ Explain that:
1306
+ 1. The transcript couldn't be fetched (reason: {error_msg})
1307
+ 2. Suggest they try:
1308
+ - A different video that has captions/subtitles enabled
1309
+ - Checking if the video is public and available
1310
+ - Using YouTube's built-in transcript feature (click ... > Show transcript)
1311
+ 3. Offer to help once they can provide the transcript text manually"""
1312
 
1313
+ try:
1314
+ msgs = build_context(ws, prompt)
1315
+ answer = llm.invoke(msgs).content
1316
+ except Exception as e:
1317
+ print(f" LLM error: {e}")
1318
+ answer = f"Error generating response: {str(e)[:100]}"
1319
+
1320
+ # Generate contextual follow-up questions
1321
+ if transcript_text:
1322
+ follow = [
1323
+ "What are the main arguments or points made?",
1324
+ "Summarize this in 3 bullet points",
1325
+ "What examples or case studies are mentioned?",
1326
+ "What should I learn from this video?",
1327
+ "Explain the most complex concept in simple terms"
1328
+ ]
1329
+ else:
1330
+ follow = [
1331
+ "Try a different YouTube video",
1332
+ "How do I enable captions on YouTube?",
1333
+ "What videos work best with Video Brain?"
1334
+ ]
1335
 
1336
+ # Build sources
1337
+ sources = [{"title": f"🎥 YouTube Video", "url": youtube_url}]
1338
+ links = [{"title": "Source Video", "url": youtube_url, "snippet": f"Video ID: {video_id}"}]
1339
 
1340
  memory.add(ws, "assistant", answer)
1341
  print(f" ✅ Video Brain: Response generated")
requirements.txt CHANGED
@@ -44,6 +44,9 @@ trafilatura==1.6.3
44
  # Wikipedia
45
  wikipedia==1.4.0
46
 
 
 
 
47
  # Document processing
48
  pypdf==4.0.1
49
  python-pptx==0.6.23
 
44
  # Wikipedia
45
  wikipedia==1.4.0
46
 
47
+ # YouTube transcript extraction
48
+ youtube-transcript-api==0.6.2
49
+
50
  # Document processing
51
  pypdf==4.0.1
52
  python-pptx==0.6.23
streamlit_app.py CHANGED
@@ -801,6 +801,16 @@ if st.session_state.current_result:
801
  <h4 style="margin: 0;">🎥 Video Analysis</h4>
802
  </div>
803
  """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
804
 
805
  # Query box
806
  mode_info = MODES.get(result['mode'], MODES['Automatic'])
@@ -829,6 +839,31 @@ if st.session_state.current_result:
829
  # Display answer directly with markdown
830
  st.markdown(answer)
831
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
832
  followups = data.get("followups", [])
833
  if followups:
834
  st.markdown("**Related:**")
 
801
  <h4 style="margin: 0;">🎥 Video Analysis</h4>
802
  </div>
803
  """, unsafe_allow_html=True)
804
+
805
+ # Show embedded video
806
+ if st.session_state.youtube_url:
807
+ video_id = ""
808
+ if "v=" in st.session_state.youtube_url:
809
+ video_id = st.session_state.youtube_url.split("v=")[1].split("&")[0]
810
+ elif "youtu.be/" in st.session_state.youtube_url:
811
+ video_id = st.session_state.youtube_url.split("youtu.be/")[1].split("?")[0]
812
+ if video_id:
813
+ st.video(f"https://www.youtube.com/watch?v={video_id}")
814
 
815
  # Query box
816
  mode_info = MODES.get(result['mode'], MODES['Automatic'])
 
839
  # Display answer directly with markdown
840
  st.markdown(answer)
841
 
842
+ # For Video Brain mode, show a follow-up question input
843
+ if result['mode'] == "Video Brain" and st.session_state.youtube_url:
844
+ st.divider()
845
+ st.markdown("**💬 Ask another question about this video:**")
846
+ followup_question = st.text_input(
847
+ "Follow-up question",
848
+ placeholder="e.g., What are the main arguments? Explain the key concept...",
849
+ key="video_followup_input",
850
+ label_visibility="collapsed"
851
+ )
852
+ if st.button("Ask", key="video_followup_btn"):
853
+ if followup_question.strip():
854
+ with st.spinner("Analyzing..."):
855
+ new_result = call_api(
856
+ followup_question.strip(),
857
+ "Video Brain",
858
+ {"youtube_url": st.session_state.youtube_url}
859
+ )
860
+ st.session_state.current_result = {
861
+ "query": followup_question.strip(),
862
+ "mode": "Video Brain",
863
+ "data": new_result
864
+ }
865
+ st.rerun()
866
+
867
  followups = data.get("followups", [])
868
  if followups:
869
  st.markdown("**Related:**")
tools/youtube_tool.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tools/youtube_tool.py
2
+ """
3
+ YouTube Transcript Extraction Tool
4
+ Extracts transcripts from YouTube videos for Video Brain mode.
5
+ """
6
+
7
+ import re
8
+ from typing import Dict, Optional
9
+ from youtube_transcript_api import YouTubeTranscriptApi
10
+ from youtube_transcript_api._errors import (
11
+ TranscriptsDisabled,
12
+ NoTranscriptFound,
13
+ VideoUnavailable
14
+ )
15
+
16
+
17
+ class YouTubeTool:
18
+ """Extract transcripts and metadata from YouTube videos."""
19
+
20
+ def extract_video_id(self, url: str) -> Optional[str]:
21
+ """Extract video ID from various YouTube URL formats."""
22
+ patterns = [
23
+ r'(?:v=|/v/|youtu\.be/|/embed/)([a-zA-Z0-9_-]{11})',
24
+ r'([a-zA-Z0-9_-]{11})'
25
+ ]
26
+
27
+ for pattern in patterns:
28
+ match = re.search(pattern, url)
29
+ if match:
30
+ return match.group(1)
31
+ return None
32
+
33
+ def get_transcript(self, video_url: str) -> Dict:
34
+ """
35
+ Get transcript from a YouTube video.
36
+
37
+ Returns:
38
+ Dict with keys:
39
+ - success: bool
40
+ - transcript: str (full transcript text)
41
+ - segments: list of {text, start, duration}
42
+ - video_id: str
43
+ - error: str (if failed)
44
+ """
45
+ video_id = self.extract_video_id(video_url)
46
+
47
+ if not video_id:
48
+ return {
49
+ "success": False,
50
+ "error": "Could not extract video ID from URL",
51
+ "transcript": "",
52
+ "segments": [],
53
+ "video_id": None
54
+ }
55
+
56
+ try:
57
+ # Try to get transcript (auto-generated or manual)
58
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
59
+
60
+ # Try to find English transcript first
61
+ transcript = None
62
+ try:
63
+ transcript = transcript_list.find_transcript(['en', 'en-US', 'en-GB'])
64
+ except:
65
+ # Fall back to any available transcript, translated to English
66
+ try:
67
+ for t in transcript_list:
68
+ transcript = t.translate('en')
69
+ break
70
+ except:
71
+ # Just get any transcript
72
+ for t in transcript_list:
73
+ transcript = t
74
+ break
75
+
76
+ if transcript:
77
+ segments = transcript.fetch()
78
+
79
+ # Build full transcript text with timestamps
80
+ full_text_parts = []
81
+ for seg in segments:
82
+ start_time = int(seg['start'])
83
+ minutes = start_time // 60
84
+ seconds = start_time % 60
85
+ timestamp = f"[{minutes}:{seconds:02d}]"
86
+ full_text_parts.append(f"{timestamp} {seg['text']}")
87
+
88
+ full_transcript = "\n".join(full_text_parts)
89
+
90
+ # Also create a clean version without timestamps
91
+ clean_text = " ".join([seg['text'] for seg in segments])
92
+
93
+ return {
94
+ "success": True,
95
+ "transcript": full_transcript,
96
+ "clean_transcript": clean_text,
97
+ "segments": segments,
98
+ "video_id": video_id,
99
+ "error": None
100
+ }
101
+ else:
102
+ return {
103
+ "success": False,
104
+ "error": "No transcript available for this video",
105
+ "transcript": "",
106
+ "segments": [],
107
+ "video_id": video_id
108
+ }
109
+
110
+ except TranscriptsDisabled:
111
+ return {
112
+ "success": False,
113
+ "error": "Transcripts are disabled for this video",
114
+ "transcript": "",
115
+ "segments": [],
116
+ "video_id": video_id
117
+ }
118
+ except NoTranscriptFound:
119
+ return {
120
+ "success": False,
121
+ "error": "No transcript found for this video",
122
+ "transcript": "",
123
+ "segments": [],
124
+ "video_id": video_id
125
+ }
126
+ except VideoUnavailable:
127
+ return {
128
+ "success": False,
129
+ "error": "Video is unavailable",
130
+ "transcript": "",
131
+ "segments": [],
132
+ "video_id": video_id
133
+ }
134
+ except Exception as e:
135
+ return {
136
+ "success": False,
137
+ "error": f"Error fetching transcript: {str(e)}",
138
+ "transcript": "",
139
+ "segments": [],
140
+ "video_id": video_id
141
+ }
142
+
143
+ def get_video_info(self, video_url: str) -> Dict:
144
+ """Get basic video info by searching."""
145
+ video_id = self.extract_video_id(video_url)
146
+ return {
147
+ "video_id": video_id,
148
+ "url": video_url,
149
+ "embed_url": f"https://www.youtube.com/embed/{video_id}" if video_id else None
150
+ }