Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -26,6 +26,8 @@ from PIL import Image
|
|
| 26 |
import base64
|
| 27 |
from googleapiclient.discovery import build
|
| 28 |
from googleapiclient.errors import HttpError
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# LangChain & LangGraph
|
| 31 |
from langgraph.graph.message import add_messages
|
|
@@ -682,93 +684,27 @@ class YoutubeInput(BaseModel):
|
|
| 682 |
@tool(args_schema=YoutubeInput)
|
| 683 |
def get_youtube_transcript(video_url: str) -> str:
|
| 684 |
"""
|
| 685 |
-
Fetches YouTube video transcript
|
| 686 |
-
|
| 687 |
"""
|
| 688 |
-
if not video_url:
|
| 689 |
-
return "Error: Invalid URL."
|
| 690 |
-
|
| 691 |
-
print(f"πΊ YouTube transcript (API v3): {video_url}")
|
| 692 |
-
|
| 693 |
-
# Get API key
|
| 694 |
-
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
|
| 695 |
-
if not YOUTUBE_API_KEY:
|
| 696 |
-
return "Error: YOUTUBE_API_KEY not set in Space secrets."
|
| 697 |
-
|
| 698 |
try:
|
| 699 |
-
#
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
# Get caption tracks
|
| 715 |
-
captions_response = youtube.captions().list(
|
| 716 |
-
part='snippet',
|
| 717 |
-
videoId=video_id
|
| 718 |
-
).execute()
|
| 719 |
-
|
| 720 |
-
if not captions_response.get('items'):
|
| 721 |
-
return "N/A - No captions available for this video."
|
| 722 |
-
|
| 723 |
-
# Find English caption track
|
| 724 |
-
caption_id = None
|
| 725 |
-
for caption in captions_response['items']:
|
| 726 |
-
lang = caption['snippet'].get('language', '')
|
| 727 |
-
if lang.startswith('en'):
|
| 728 |
-
caption_id = caption['id']
|
| 729 |
-
print(f" Found English captions: {lang}")
|
| 730 |
-
break
|
| 731 |
|
| 732 |
-
if not caption_id:
|
| 733 |
-
# Try first available caption
|
| 734 |
-
caption_id = captions_response['items'][0]['id']
|
| 735 |
-
print(f" Using first available caption track")
|
| 736 |
-
|
| 737 |
-
# Download caption content
|
| 738 |
-
caption_content = youtube.captions().download(
|
| 739 |
-
id=caption_id,
|
| 740 |
-
tfmt='srt' # or 'vtt'
|
| 741 |
-
).execute()
|
| 742 |
-
|
| 743 |
-
# Parse SRT format (remove timestamps and numbers)
|
| 744 |
-
lines = caption_content.decode('utf-8').split('\n')
|
| 745 |
-
transcript_parts = []
|
| 746 |
-
|
| 747 |
-
for line in lines:
|
| 748 |
-
line = line.strip()
|
| 749 |
-
# Skip line numbers, timestamps, and empty lines
|
| 750 |
-
if (line and
|
| 751 |
-
not line.isdigit() and
|
| 752 |
-
'-->' not in line):
|
| 753 |
-
transcript_parts.append(line)
|
| 754 |
-
|
| 755 |
-
full_transcript = ' '.join(transcript_parts)
|
| 756 |
-
|
| 757 |
-
if not full_transcript:
|
| 758 |
-
return "Error: Transcript was empty."
|
| 759 |
-
|
| 760 |
-
print(f"β Transcript retrieved: {len(full_transcript)} chars")
|
| 761 |
-
return f"Transcript:\n{truncate_if_needed(full_transcript)}"
|
| 762 |
-
|
| 763 |
-
except HttpError as e:
|
| 764 |
-
if e.resp.status == 403:
|
| 765 |
-
return "Error: YouTube API quota exceeded or captions are disabled for this video."
|
| 766 |
-
elif e.resp.status == 404:
|
| 767 |
-
return "Error: Video not found or captions not available."
|
| 768 |
-
else:
|
| 769 |
-
return f"YouTube API error: {str(e)}"
|
| 770 |
except Exception as e:
|
| 771 |
-
print(f"β Error: {str(e)}")
|
| 772 |
return f"Error: {str(e)}"
|
| 773 |
|
| 774 |
|
|
|
|
| 26 |
import base64
|
| 27 |
from googleapiclient.discovery import build
|
| 28 |
from googleapiclient.errors import HttpError
|
| 29 |
+
import assemblyai as aai
|
| 30 |
+
|
| 31 |
|
| 32 |
# LangChain & LangGraph
|
| 33 |
from langgraph.graph.message import add_messages
|
|
|
|
| 684 |
@tool(args_schema=YoutubeInput)
|
| 685 |
def get_youtube_transcript(video_url: str) -> str:
|
| 686 |
"""
|
| 687 |
+
Fetches YouTube video transcript using AssemblyAI.
|
| 688 |
+
Works reliably on Hugging Face Spaces.
|
| 689 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 690 |
try:
|
| 691 |
+
# Set API key (store in HF Spaces secrets)
|
| 692 |
+
aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
|
| 693 |
+
|
| 694 |
+
print(f"πΊ Transcribing: {video_url}")
|
| 695 |
+
|
| 696 |
+
# Transcribe directly from YouTube URL
|
| 697 |
+
transcriber = aai.Transcriber()
|
| 698 |
+
transcript = transcriber.transcribe(video_url)
|
| 699 |
+
|
| 700 |
+
# Wait for transcription
|
| 701 |
+
if transcript.status == aai.TranscriptStatus.error:
|
| 702 |
+
return f"Error: {transcript.error}"
|
| 703 |
+
|
| 704 |
+
print(f"β Transcribed {len(transcript.text)} chars")
|
| 705 |
+
return f"Transcript:\n{transcript.text}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 706 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 707 |
except Exception as e:
|
|
|
|
| 708 |
return f"Error: {str(e)}"
|
| 709 |
|
| 710 |
|