Spaces:
Running
Running
Commit ·
b47dcd2
1
Parent(s): d76cab0
Add YouTube transcript extraction, improve Video Brain with follow-up questions, fix all modes
Browse files- app/api.py +99 -71
- requirements.txt +3 -0
- streamlit_app.py +35 -0
- tools/youtube_tool.py +150 -0
app/api.py
CHANGED
|
@@ -1197,8 +1197,14 @@ Be detailed, practical, and use real-world best practices. Make it production-re
|
|
| 1197 |
|
| 1198 |
|
| 1199 |
# =======================================================
|
| 1200 |
-
# VIDEO BRAIN ENDPOINT - YouTube Video Analysis
|
| 1201 |
# =======================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1202 |
class VideoBrainRequest(BaseModel):
|
| 1203 |
message: str
|
| 1204 |
workspace_id: str = "default"
|
|
@@ -1209,8 +1215,8 @@ class VideoBrainRequest(BaseModel):
|
|
| 1209 |
@app.post("/api/video_brain", response_model=ChatResponse)
|
| 1210 |
def video_brain_mode(req: VideoBrainRequest):
|
| 1211 |
"""
|
| 1212 |
-
Video Brain Mode - Analyzes YouTube videos
|
| 1213 |
-
Extracts transcript
|
| 1214 |
"""
|
| 1215 |
q = req.message.strip()
|
| 1216 |
ws = req.workspace_id
|
|
@@ -1222,7 +1228,7 @@ def video_brain_mode(req: VideoBrainRequest):
|
|
| 1222 |
|
| 1223 |
if not youtube_url:
|
| 1224 |
return ChatResponse(
|
| 1225 |
-
answer="⚠️ Please provide a YouTube URL first.
|
| 1226 |
sources=[],
|
| 1227 |
links=[],
|
| 1228 |
images=[],
|
|
@@ -1231,83 +1237,105 @@ def video_brain_mode(req: VideoBrainRequest):
|
|
| 1231 |
workspace_id=ws
|
| 1232 |
)
|
| 1233 |
|
| 1234 |
-
#
|
| 1235 |
-
|
| 1236 |
-
|
| 1237 |
|
| 1238 |
-
|
| 1239 |
-
|
| 1240 |
-
|
| 1241 |
-
|
| 1242 |
-
|
| 1243 |
-
|
| 1244 |
-
|
| 1245 |
-
|
| 1246 |
-
|
| 1247 |
-
|
| 1248 |
-
|
| 1249 |
-
|
| 1250 |
-
|
| 1251 |
-
|
| 1252 |
-
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
snippet = r.get("content", "") or r.get("snippet", "")
|
| 1258 |
-
if snippet:
|
| 1259 |
-
video_content += snippet + "\n"
|
| 1260 |
-
|
| 1261 |
-
# Search for transcript or summary
|
| 1262 |
-
search_query = f"youtube video transcript summary {video_title or video_id}"
|
| 1263 |
-
results = search_tool.search(search_query, num_results=3)
|
| 1264 |
-
|
| 1265 |
-
for r in results[:2]:
|
| 1266 |
-
url = r.get("url", "")
|
| 1267 |
-
if url and "youtube.com" not in url: # Skip YouTube pages, get transcripts
|
| 1268 |
-
text = browse_tool.fetch_clean(url)
|
| 1269 |
-
if text:
|
| 1270 |
-
video_content += text[:2000] + "\n\n"
|
| 1271 |
-
|
| 1272 |
-
print(f" 📝 Content gathered: {len(video_content)} chars")
|
| 1273 |
-
|
| 1274 |
-
except Exception as e:
|
| 1275 |
-
print(f" ❌ Video content fetch error: {e}")
|
| 1276 |
|
| 1277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1278 |
|
| 1279 |
-
VIDEO
|
| 1280 |
-
{
|
| 1281 |
|
| 1282 |
-
|
| 1283 |
|
| 1284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1285 |
|
| 1286 |
-
|
| 1287 |
-
|
| 1288 |
-
|
| 1289 |
-
3. If asking about specific topics, explain them clearly
|
| 1290 |
-
4. Use timestamps if available (e.g., "At around 5:30...")
|
| 1291 |
-
5. If limited information is available, be honest but still provide helpful guidance
|
| 1292 |
-
6. Format your response with headers and bullet points for clarity
|
| 1293 |
-
7. Make the response educational and easy to understand
|
| 1294 |
|
| 1295 |
-
|
|
|
|
| 1296 |
|
| 1297 |
-
|
| 1298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1299 |
|
| 1300 |
-
|
| 1301 |
-
|
| 1302 |
-
|
| 1303 |
-
|
| 1304 |
-
"
|
| 1305 |
-
|
| 1306 |
-
|
| 1307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1308 |
|
| 1309 |
-
|
| 1310 |
-
|
|
|
|
| 1311 |
|
| 1312 |
memory.add(ws, "assistant", answer)
|
| 1313 |
print(f" ✅ Video Brain: Response generated")
|
|
|
|
| 1197 |
|
| 1198 |
|
| 1199 |
# =======================================================
|
| 1200 |
+
# VIDEO BRAIN ENDPOINT - YouTube Video Analysis with Transcript
|
| 1201 |
# =======================================================
|
| 1202 |
+
from tools.youtube_tool import YouTubeTool
|
| 1203 |
+
youtube_tool = YouTubeTool()
|
| 1204 |
+
|
| 1205 |
+
# Store video transcripts in memory per workspace
|
| 1206 |
+
video_transcripts = {}
|
| 1207 |
+
|
| 1208 |
class VideoBrainRequest(BaseModel):
|
| 1209 |
message: str
|
| 1210 |
workspace_id: str = "default"
|
|
|
|
| 1215 |
@app.post("/api/video_brain", response_model=ChatResponse)
|
| 1216 |
def video_brain_mode(req: VideoBrainRequest):
|
| 1217 |
"""
|
| 1218 |
+
Video Brain Mode - Analyzes YouTube videos using actual transcripts.
|
| 1219 |
+
Extracts real transcript and provides accurate responses.
|
| 1220 |
"""
|
| 1221 |
q = req.message.strip()
|
| 1222 |
ws = req.workspace_id
|
|
|
|
| 1228 |
|
| 1229 |
if not youtube_url:
|
| 1230 |
return ChatResponse(
|
| 1231 |
+
answer="⚠️ Please provide a YouTube URL first. Paste the URL in the search box or load it above.",
|
| 1232 |
sources=[],
|
| 1233 |
links=[],
|
| 1234 |
images=[],
|
|
|
|
| 1237 |
workspace_id=ws
|
| 1238 |
)
|
| 1239 |
|
| 1240 |
+
# Check if we already have transcript for this video
|
| 1241 |
+
video_id = youtube_tool.extract_video_id(youtube_url)
|
| 1242 |
+
cache_key = f"{ws}_{video_id}"
|
| 1243 |
|
| 1244 |
+
transcript_data = None
|
| 1245 |
+
if cache_key in video_transcripts:
|
| 1246 |
+
transcript_data = video_transcripts[cache_key]
|
| 1247 |
+
print(f" 📋 Using cached transcript for {video_id}")
|
| 1248 |
+
else:
|
| 1249 |
+
# Fetch new transcript
|
| 1250 |
+
print(f" 🔄 Fetching transcript for video: {video_id}")
|
| 1251 |
+
transcript_data = youtube_tool.get_transcript(youtube_url)
|
| 1252 |
+
if transcript_data["success"]:
|
| 1253 |
+
video_transcripts[cache_key] = transcript_data
|
| 1254 |
+
print(f" ✅ Transcript fetched: {len(transcript_data.get('transcript', ''))} chars")
|
| 1255 |
+
else:
|
| 1256 |
+
print(f" ⚠️ Transcript fetch failed: {transcript_data.get('error')}")
|
| 1257 |
+
|
| 1258 |
+
# Build context for LLM
|
| 1259 |
+
transcript_text = ""
|
| 1260 |
+
if transcript_data and transcript_data.get("success"):
|
| 1261 |
+
# Use clean transcript for context (with timestamps)
|
| 1262 |
+
transcript_text = transcript_data.get("transcript", "")[:8000] # Limit for context window
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1263 |
|
| 1264 |
+
# Generate appropriate prompt based on question type
|
| 1265 |
+
q_lower = q.lower()
|
| 1266 |
+
is_summary = any(word in q_lower for word in ["summarize", "summary", "overview", "main points", "key takeaways", "what is this about"])
|
| 1267 |
+
|
| 1268 |
+
if transcript_text:
|
| 1269 |
+
if is_summary:
|
| 1270 |
+
prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing YouTube videos.
|
| 1271 |
|
| 1272 |
+
VIDEO TRANSCRIPT (with timestamps [MM:SS]):
|
| 1273 |
+
{transcript_text}
|
| 1274 |
|
| 1275 |
+
USER REQUEST: {q}
|
| 1276 |
|
| 1277 |
+
Provide a comprehensive summary with:
|
| 1278 |
+
1. **Overview**: One paragraph describing what the video is about
|
| 1279 |
+
2. **Key Points**: 5-7 main takeaways with timestamps
|
| 1280 |
+
3. **Important Details**: Any specific facts, figures, or examples mentioned
|
| 1281 |
+
4. **Actionable Insights**: What viewers should do or remember
|
| 1282 |
|
| 1283 |
+
Use the actual content from the transcript. Reference timestamps like [5:30] when citing specific parts."""
|
| 1284 |
+
else:
|
| 1285 |
+
prompt = f"""You are VIDEO BRAIN AI - an expert at analyzing YouTube videos.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1286 |
|
| 1287 |
+
VIDEO TRANSCRIPT (with timestamps [MM:SS]):
|
| 1288 |
+
{transcript_text}
|
| 1289 |
|
| 1290 |
+
USER QUESTION: {q}
|
| 1291 |
+
|
| 1292 |
+
Answer the question using ONLY the information from the transcript above.
|
| 1293 |
+
- Be specific and cite timestamps when relevant
|
| 1294 |
+
- If the answer is not in the transcript, say so honestly
|
| 1295 |
+
- Format your response clearly with bullet points if appropriate"""
|
| 1296 |
+
else:
|
| 1297 |
+
# No transcript available - explain why
|
| 1298 |
+
error_msg = transcript_data.get("error", "Unknown error") if transcript_data else "Could not fetch transcript"
|
| 1299 |
+
prompt = f"""The user asked about a YouTube video but I couldn't extract the transcript.
|
| 1300 |
+
|
| 1301 |
+
Video URL: {youtube_url}
|
| 1302 |
+
Error: {error_msg}
|
| 1303 |
+
User Question: {q}
|
| 1304 |
+
|
| 1305 |
+
Explain that:
|
| 1306 |
+
1. The transcript couldn't be fetched (reason: {error_msg})
|
| 1307 |
+
2. Suggest they try:
|
| 1308 |
+
- A different video that has captions/subtitles enabled
|
| 1309 |
+
- Checking if the video is public and available
|
| 1310 |
+
- Using YouTube's built-in transcript feature (click ... > Show transcript)
|
| 1311 |
+
3. Offer to help once they can provide the transcript text manually"""
|
| 1312 |
|
| 1313 |
+
try:
|
| 1314 |
+
msgs = build_context(ws, prompt)
|
| 1315 |
+
answer = llm.invoke(msgs).content
|
| 1316 |
+
except Exception as e:
|
| 1317 |
+
print(f" ❌ LLM error: {e}")
|
| 1318 |
+
answer = f"Error generating response: {str(e)[:100]}"
|
| 1319 |
+
|
| 1320 |
+
# Generate contextual follow-up questions
|
| 1321 |
+
if transcript_text:
|
| 1322 |
+
follow = [
|
| 1323 |
+
"What are the main arguments or points made?",
|
| 1324 |
+
"Summarize this in 3 bullet points",
|
| 1325 |
+
"What examples or case studies are mentioned?",
|
| 1326 |
+
"What should I learn from this video?",
|
| 1327 |
+
"Explain the most complex concept in simple terms"
|
| 1328 |
+
]
|
| 1329 |
+
else:
|
| 1330 |
+
follow = [
|
| 1331 |
+
"Try a different YouTube video",
|
| 1332 |
+
"How do I enable captions on YouTube?",
|
| 1333 |
+
"What videos work best with Video Brain?"
|
| 1334 |
+
]
|
| 1335 |
|
| 1336 |
+
# Build sources
|
| 1337 |
+
sources = [{"title": f"🎥 YouTube Video", "url": youtube_url}]
|
| 1338 |
+
links = [{"title": "Source Video", "url": youtube_url, "snippet": f"Video ID: {video_id}"}]
|
| 1339 |
|
| 1340 |
memory.add(ws, "assistant", answer)
|
| 1341 |
print(f" ✅ Video Brain: Response generated")
|
requirements.txt
CHANGED
|
@@ -44,6 +44,9 @@ trafilatura==1.6.3
|
|
| 44 |
# Wikipedia
|
| 45 |
wikipedia==1.4.0
|
| 46 |
|
|
|
|
|
|
|
|
|
|
| 47 |
# Document processing
|
| 48 |
pypdf==4.0.1
|
| 49 |
python-pptx==0.6.23
|
|
|
|
| 44 |
# Wikipedia
|
| 45 |
wikipedia==1.4.0
|
| 46 |
|
| 47 |
+
# YouTube transcript extraction
|
| 48 |
+
youtube-transcript-api==0.6.2
|
| 49 |
+
|
| 50 |
# Document processing
|
| 51 |
pypdf==4.0.1
|
| 52 |
python-pptx==0.6.23
|
streamlit_app.py
CHANGED
|
@@ -801,6 +801,16 @@ if st.session_state.current_result:
|
|
| 801 |
<h4 style="margin: 0;">🎥 Video Analysis</h4>
|
| 802 |
</div>
|
| 803 |
""", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 804 |
|
| 805 |
# Query box
|
| 806 |
mode_info = MODES.get(result['mode'], MODES['Automatic'])
|
|
@@ -829,6 +839,31 @@ if st.session_state.current_result:
|
|
| 829 |
# Display answer directly with markdown
|
| 830 |
st.markdown(answer)
|
| 831 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 832 |
followups = data.get("followups", [])
|
| 833 |
if followups:
|
| 834 |
st.markdown("**Related:**")
|
|
|
|
| 801 |
<h4 style="margin: 0;">🎥 Video Analysis</h4>
|
| 802 |
</div>
|
| 803 |
""", unsafe_allow_html=True)
|
| 804 |
+
|
| 805 |
+
# Show embedded video
|
| 806 |
+
if st.session_state.youtube_url:
|
| 807 |
+
video_id = ""
|
| 808 |
+
if "v=" in st.session_state.youtube_url:
|
| 809 |
+
video_id = st.session_state.youtube_url.split("v=")[1].split("&")[0]
|
| 810 |
+
elif "youtu.be/" in st.session_state.youtube_url:
|
| 811 |
+
video_id = st.session_state.youtube_url.split("youtu.be/")[1].split("?")[0]
|
| 812 |
+
if video_id:
|
| 813 |
+
st.video(f"https://www.youtube.com/watch?v={video_id}")
|
| 814 |
|
| 815 |
# Query box
|
| 816 |
mode_info = MODES.get(result['mode'], MODES['Automatic'])
|
|
|
|
| 839 |
# Display answer directly with markdown
|
| 840 |
st.markdown(answer)
|
| 841 |
|
| 842 |
+
# For Video Brain mode, show a follow-up question input
|
| 843 |
+
if result['mode'] == "Video Brain" and st.session_state.youtube_url:
|
| 844 |
+
st.divider()
|
| 845 |
+
st.markdown("**💬 Ask another question about this video:**")
|
| 846 |
+
followup_question = st.text_input(
|
| 847 |
+
"Follow-up question",
|
| 848 |
+
placeholder="e.g., What are the main arguments? Explain the key concept...",
|
| 849 |
+
key="video_followup_input",
|
| 850 |
+
label_visibility="collapsed"
|
| 851 |
+
)
|
| 852 |
+
if st.button("Ask", key="video_followup_btn"):
|
| 853 |
+
if followup_question.strip():
|
| 854 |
+
with st.spinner("Analyzing..."):
|
| 855 |
+
new_result = call_api(
|
| 856 |
+
followup_question.strip(),
|
| 857 |
+
"Video Brain",
|
| 858 |
+
{"youtube_url": st.session_state.youtube_url}
|
| 859 |
+
)
|
| 860 |
+
st.session_state.current_result = {
|
| 861 |
+
"query": followup_question.strip(),
|
| 862 |
+
"mode": "Video Brain",
|
| 863 |
+
"data": new_result
|
| 864 |
+
}
|
| 865 |
+
st.rerun()
|
| 866 |
+
|
| 867 |
followups = data.get("followups", [])
|
| 868 |
if followups:
|
| 869 |
st.markdown("**Related:**")
|
tools/youtube_tool.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/youtube_tool.py
|
| 2 |
+
"""
|
| 3 |
+
YouTube Transcript Extraction Tool
|
| 4 |
+
Extracts transcripts from YouTube videos for Video Brain mode.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import re
|
| 8 |
+
from typing import Dict, Optional
|
| 9 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 10 |
+
from youtube_transcript_api._errors import (
|
| 11 |
+
TranscriptsDisabled,
|
| 12 |
+
NoTranscriptFound,
|
| 13 |
+
VideoUnavailable
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class YouTubeTool:
|
| 18 |
+
"""Extract transcripts and metadata from YouTube videos."""
|
| 19 |
+
|
| 20 |
+
def extract_video_id(self, url: str) -> Optional[str]:
|
| 21 |
+
"""Extract video ID from various YouTube URL formats."""
|
| 22 |
+
patterns = [
|
| 23 |
+
r'(?:v=|/v/|youtu\.be/|/embed/)([a-zA-Z0-9_-]{11})',
|
| 24 |
+
r'([a-zA-Z0-9_-]{11})'
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
for pattern in patterns:
|
| 28 |
+
match = re.search(pattern, url)
|
| 29 |
+
if match:
|
| 30 |
+
return match.group(1)
|
| 31 |
+
return None
|
| 32 |
+
|
| 33 |
+
def get_transcript(self, video_url: str) -> Dict:
|
| 34 |
+
"""
|
| 35 |
+
Get transcript from a YouTube video.
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
Dict with keys:
|
| 39 |
+
- success: bool
|
| 40 |
+
- transcript: str (full transcript text)
|
| 41 |
+
- segments: list of {text, start, duration}
|
| 42 |
+
- video_id: str
|
| 43 |
+
- error: str (if failed)
|
| 44 |
+
"""
|
| 45 |
+
video_id = self.extract_video_id(video_url)
|
| 46 |
+
|
| 47 |
+
if not video_id:
|
| 48 |
+
return {
|
| 49 |
+
"success": False,
|
| 50 |
+
"error": "Could not extract video ID from URL",
|
| 51 |
+
"transcript": "",
|
| 52 |
+
"segments": [],
|
| 53 |
+
"video_id": None
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
try:
|
| 57 |
+
# Try to get transcript (auto-generated or manual)
|
| 58 |
+
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
| 59 |
+
|
| 60 |
+
# Try to find English transcript first
|
| 61 |
+
transcript = None
|
| 62 |
+
try:
|
| 63 |
+
transcript = transcript_list.find_transcript(['en', 'en-US', 'en-GB'])
|
| 64 |
+
except:
|
| 65 |
+
# Fall back to any available transcript, translated to English
|
| 66 |
+
try:
|
| 67 |
+
for t in transcript_list:
|
| 68 |
+
transcript = t.translate('en')
|
| 69 |
+
break
|
| 70 |
+
except:
|
| 71 |
+
# Just get any transcript
|
| 72 |
+
for t in transcript_list:
|
| 73 |
+
transcript = t
|
| 74 |
+
break
|
| 75 |
+
|
| 76 |
+
if transcript:
|
| 77 |
+
segments = transcript.fetch()
|
| 78 |
+
|
| 79 |
+
# Build full transcript text with timestamps
|
| 80 |
+
full_text_parts = []
|
| 81 |
+
for seg in segments:
|
| 82 |
+
start_time = int(seg['start'])
|
| 83 |
+
minutes = start_time // 60
|
| 84 |
+
seconds = start_time % 60
|
| 85 |
+
timestamp = f"[{minutes}:{seconds:02d}]"
|
| 86 |
+
full_text_parts.append(f"{timestamp} {seg['text']}")
|
| 87 |
+
|
| 88 |
+
full_transcript = "\n".join(full_text_parts)
|
| 89 |
+
|
| 90 |
+
# Also create a clean version without timestamps
|
| 91 |
+
clean_text = " ".join([seg['text'] for seg in segments])
|
| 92 |
+
|
| 93 |
+
return {
|
| 94 |
+
"success": True,
|
| 95 |
+
"transcript": full_transcript,
|
| 96 |
+
"clean_transcript": clean_text,
|
| 97 |
+
"segments": segments,
|
| 98 |
+
"video_id": video_id,
|
| 99 |
+
"error": None
|
| 100 |
+
}
|
| 101 |
+
else:
|
| 102 |
+
return {
|
| 103 |
+
"success": False,
|
| 104 |
+
"error": "No transcript available for this video",
|
| 105 |
+
"transcript": "",
|
| 106 |
+
"segments": [],
|
| 107 |
+
"video_id": video_id
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
except TranscriptsDisabled:
|
| 111 |
+
return {
|
| 112 |
+
"success": False,
|
| 113 |
+
"error": "Transcripts are disabled for this video",
|
| 114 |
+
"transcript": "",
|
| 115 |
+
"segments": [],
|
| 116 |
+
"video_id": video_id
|
| 117 |
+
}
|
| 118 |
+
except NoTranscriptFound:
|
| 119 |
+
return {
|
| 120 |
+
"success": False,
|
| 121 |
+
"error": "No transcript found for this video",
|
| 122 |
+
"transcript": "",
|
| 123 |
+
"segments": [],
|
| 124 |
+
"video_id": video_id
|
| 125 |
+
}
|
| 126 |
+
except VideoUnavailable:
|
| 127 |
+
return {
|
| 128 |
+
"success": False,
|
| 129 |
+
"error": "Video is unavailable",
|
| 130 |
+
"transcript": "",
|
| 131 |
+
"segments": [],
|
| 132 |
+
"video_id": video_id
|
| 133 |
+
}
|
| 134 |
+
except Exception as e:
|
| 135 |
+
return {
|
| 136 |
+
"success": False,
|
| 137 |
+
"error": f"Error fetching transcript: {str(e)}",
|
| 138 |
+
"transcript": "",
|
| 139 |
+
"segments": [],
|
| 140 |
+
"video_id": video_id
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
def get_video_info(self, video_url: str) -> Dict:
|
| 144 |
+
"""Get basic video info by searching."""
|
| 145 |
+
video_id = self.extract_video_id(video_url)
|
| 146 |
+
return {
|
| 147 |
+
"video_id": video_id,
|
| 148 |
+
"url": video_url,
|
| 149 |
+
"embed_url": f"https://www.youtube.com/embed/{video_id}" if video_id else None
|
| 150 |
+
}
|