gabejavitt commited on
Commit
fac3e4d
Β·
verified Β·
1 Parent(s): a24d26b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -83
app.py CHANGED
@@ -26,6 +26,8 @@ from PIL import Image
26
  import base64
27
  from googleapiclient.discovery import build
28
  from googleapiclient.errors import HttpError
 
 
29
 
30
  # LangChain & LangGraph
31
  from langgraph.graph.message import add_messages
@@ -682,93 +684,27 @@ class YoutubeInput(BaseModel):
682
  @tool(args_schema=YoutubeInput)
683
  def get_youtube_transcript(video_url: str) -> str:
684
  """
685
- Fetches YouTube video transcript/captions using YouTube Data API v3.
686
- Much more reliable than yt-dlp on cloud environments.
687
  """
688
- if not video_url:
689
- return "Error: Invalid URL."
690
-
691
- print(f"πŸ“Ί YouTube transcript (API v3): {video_url}")
692
-
693
- # Get API key
694
- YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
695
- if not YOUTUBE_API_KEY:
696
- return "Error: YOUTUBE_API_KEY not set in Space secrets."
697
-
698
  try:
699
- # Extract video ID
700
- video_id = None
701
- if "watch?v=" in video_url:
702
- video_id = video_url.split("v=")[1].split("&")[0]
703
- elif "youtu.be/" in video_url:
704
- video_id = video_url.split("youtu.be/")[1].split("?")[0]
705
-
706
- if not video_id:
707
- return "Error: Could not extract video ID from URL."
708
-
709
- print(f" Video ID: {video_id}")
710
-
711
- # Initialize YouTube API
712
- youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
713
-
714
- # Get caption tracks
715
- captions_response = youtube.captions().list(
716
- part='snippet',
717
- videoId=video_id
718
- ).execute()
719
-
720
- if not captions_response.get('items'):
721
- return "N/A - No captions available for this video."
722
-
723
- # Find English caption track
724
- caption_id = None
725
- for caption in captions_response['items']:
726
- lang = caption['snippet'].get('language', '')
727
- if lang.startswith('en'):
728
- caption_id = caption['id']
729
- print(f" Found English captions: {lang}")
730
- break
731
 
732
- if not caption_id:
733
- # Try first available caption
734
- caption_id = captions_response['items'][0]['id']
735
- print(f" Using first available caption track")
736
-
737
- # Download caption content
738
- caption_content = youtube.captions().download(
739
- id=caption_id,
740
- tfmt='srt' # or 'vtt'
741
- ).execute()
742
-
743
- # Parse SRT format (remove timestamps and numbers)
744
- lines = caption_content.decode('utf-8').split('\n')
745
- transcript_parts = []
746
-
747
- for line in lines:
748
- line = line.strip()
749
- # Skip line numbers, timestamps, and empty lines
750
- if (line and
751
- not line.isdigit() and
752
- '-->' not in line):
753
- transcript_parts.append(line)
754
-
755
- full_transcript = ' '.join(transcript_parts)
756
-
757
- if not full_transcript:
758
- return "Error: Transcript was empty."
759
-
760
- print(f"βœ“ Transcript retrieved: {len(full_transcript)} chars")
761
- return f"Transcript:\n{truncate_if_needed(full_transcript)}"
762
-
763
- except HttpError as e:
764
- if e.resp.status == 403:
765
- return "Error: YouTube API quota exceeded or captions are disabled for this video."
766
- elif e.resp.status == 404:
767
- return "Error: Video not found or captions not available."
768
- else:
769
- return f"YouTube API error: {str(e)}"
770
  except Exception as e:
771
- print(f"❌ Error: {str(e)}")
772
  return f"Error: {str(e)}"
773
 
774
 
 
26
  import base64
27
  from googleapiclient.discovery import build
28
  from googleapiclient.errors import HttpError
29
+ import assemblyai as aai
30
+
31
 
32
  # LangChain & LangGraph
33
  from langgraph.graph.message import add_messages
 
684
  @tool(args_schema=YoutubeInput)
685
  def get_youtube_transcript(video_url: str) -> str:
686
  """
687
+ Fetches YouTube video transcript using AssemblyAI.
688
+ Works reliably on Hugging Face Spaces.
689
  """
 
 
 
 
 
 
 
 
 
 
690
  try:
691
+ # Set API key (store in HF Spaces secrets)
692
+ aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
693
+
694
+ print(f"πŸ“Ί Transcribing: {video_url}")
695
+
696
+ # Transcribe directly from YouTube URL
697
+ transcriber = aai.Transcriber()
698
+ transcript = transcriber.transcribe(video_url)
699
+
700
+ # Wait for transcription
701
+ if transcript.status == aai.TranscriptStatus.error:
702
+ return f"Error: {transcript.error}"
703
+
704
+ print(f"βœ“ Transcribed {len(transcript.text)} chars")
705
+ return f"Transcript:\n{transcript.text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
707
  except Exception as e:
 
708
  return f"Error: {str(e)}"
709
 
710