Spaces:
Running
Running
| import re | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from logging_config import logger | |
| def get_video_id(youtube_url: str) -> str: | |
| pattern = r"(?:v=|/shorts/|\.be/)([^&\n?#]+)" | |
| match = re.search(pattern, youtube_url) | |
| if not match: | |
| raise ValueError("Could not extract video ID from the provided URL.") | |
| return match.group(1) | |
| def fetch_youtube_transcript(youtube_url: str) -> str: | |
| try: | |
| video_id = get_video_id(youtube_url) | |
| logger.info(f"Fetching official YouTube transcript for video ID: {video_id}") | |
| transcript_data = YouTubeTranscriptApi.get_transcript(video_id) | |
| transcript = " ".join([entry["text"] for entry in transcript_data]) | |
| return transcript | |
| except Exception as e: | |
| err_msg = f"Error fetching transcript: {str(e)}" | |
| logger.error(err_msg) | |
| return err_msg | |