Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,6 +23,8 @@ from bs4 import BeautifulSoup
|
|
| 23 |
import requests
|
| 24 |
from PIL import Image
|
| 25 |
import base64
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# LangChain & LangGraph
|
| 28 |
from langgraph.graph.message import add_messages
|
|
@@ -555,13 +557,14 @@ class YoutubeInput(BaseModel):
|
|
| 555 |
|
| 556 |
@tool(args_schema=YoutubeInput)
|
| 557 |
def get_youtube_transcript(video_url: str) -> str:
|
| 558 |
-
"""Fetches YouTube video transcript."""
|
| 559 |
if not video_url:
|
| 560 |
return "Error: Invalid URL."
|
| 561 |
|
| 562 |
print(f"📺 YouTube transcript: {video_url}")
|
| 563 |
|
| 564 |
try:
|
|
|
|
| 565 |
video_id = None
|
| 566 |
if "watch?v=" in video_url:
|
| 567 |
video_id = video_url.split("v=")[1].split("&")[0]
|
|
@@ -570,15 +573,54 @@ def get_youtube_transcript(video_url: str) -> str:
|
|
| 570 |
|
| 571 |
if not video_id:
|
| 572 |
return f"Error: Could not extract video ID."
|
| 573 |
-
|
| 574 |
-
# FIXED: Use get_transcript instead of list_transcripts
|
| 575 |
-
transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
|
| 576 |
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 581 |
return f"Transcript:\n{truncate_if_needed(full_transcript)}"
|
|
|
|
|
|
|
|
|
|
| 582 |
except Exception as e:
|
| 583 |
return f"Transcript error: {str(e)}"
|
| 584 |
|
|
|
|
| 23 |
import requests
|
| 24 |
from PIL import Image
|
| 25 |
import base64
|
| 26 |
+
from googleapiclient.discovery import build
|
| 27 |
+
from googleapiclient.errors import HttpError
|
| 28 |
|
| 29 |
# LangChain & LangGraph
|
| 30 |
from langgraph.graph.message import add_messages
|
|
|
|
| 557 |
|
| 558 |
@tool(args_schema=YoutubeInput)
|
| 559 |
def get_youtube_transcript(video_url: str) -> str:
|
| 560 |
+
"""Fetches YouTube video transcript using official API."""
|
| 561 |
if not video_url:
|
| 562 |
return "Error: Invalid URL."
|
| 563 |
|
| 564 |
print(f"📺 YouTube transcript: {video_url}")
|
| 565 |
|
| 566 |
try:
|
| 567 |
+
# Extract video ID
|
| 568 |
video_id = None
|
| 569 |
if "watch?v=" in video_url:
|
| 570 |
video_id = video_url.split("v=")[1].split("&")[0]
|
|
|
|
| 573 |
|
| 574 |
if not video_id:
|
| 575 |
return f"Error: Could not extract video ID."
|
|
|
|
|
|
|
|
|
|
| 576 |
|
| 577 |
+
# Get API key
|
| 578 |
+
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
|
| 579 |
+
if not YOUTUBE_API_KEY:
|
| 580 |
+
return "Error: YOUTUBE_API_KEY not set in environment."
|
| 581 |
+
|
| 582 |
+
# Build YouTube API client
|
| 583 |
+
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
|
| 584 |
+
|
| 585 |
+
# Get captions list
|
| 586 |
+
captions_response = youtube.captions().list(
|
| 587 |
+
part='snippet',
|
| 588 |
+
videoId=video_id
|
| 589 |
+
).execute()
|
| 590 |
+
|
| 591 |
+
if not captions_response.get('items'):
|
| 592 |
+
return "Error: No captions available for this video."
|
| 593 |
+
|
| 594 |
+
# Find English caption track
|
| 595 |
+
caption_id = None
|
| 596 |
+
for caption in captions_response['items']:
|
| 597 |
+
if caption['snippet']['language'] == 'en':
|
| 598 |
+
caption_id = caption['id']
|
| 599 |
+
break
|
| 600 |
+
|
| 601 |
+
if not caption_id:
|
| 602 |
+
# Try first available caption
|
| 603 |
+
caption_id = captions_response['items'][0]['id']
|
| 604 |
+
|
| 605 |
+
# Download caption
|
| 606 |
+
caption_download = youtube.captions().download(
|
| 607 |
+
id=caption_id,
|
| 608 |
+
tfmt='srt' # or 'vtt'
|
| 609 |
+
).execute()
|
| 610 |
+
|
| 611 |
+
# Parse SRT format to plain text
|
| 612 |
+
import re
|
| 613 |
+
text_lines = []
|
| 614 |
+
for line in caption_download.decode('utf-8').split('\n'):
|
| 615 |
+
# Skip timestamp lines and sequence numbers
|
| 616 |
+
if not re.match(r'^\d+$', line) and not re.match(r'\d{2}:\d{2}:\d{2}', line) and line.strip():
|
| 617 |
+
text_lines.append(line.strip())
|
| 618 |
+
|
| 619 |
+
full_transcript = " ".join(text_lines)
|
| 620 |
return f"Transcript:\n{truncate_if_needed(full_transcript)}"
|
| 621 |
+
|
| 622 |
+
except HttpError as e:
|
| 623 |
+
return f"YouTube API error: {e}"
|
| 624 |
except Exception as e:
|
| 625 |
return f"Transcript error: {str(e)}"
|
| 626 |
|