import re import requests from bs4 import BeautifulSoup def youtube_transcript(url: str) -> str: match = re.search(r"(?:v=|youtu\.be/|/shorts/|/embed/)([0-9A-Za-z_-]{11})", url) video_id = match.group(1) if match else url.strip() # 尝试获取字幕 try: from youtube_transcript_api import YouTubeTranscriptApi if hasattr(YouTubeTranscriptApi, "get_transcript"): chunks = YouTubeTranscriptApi.get_transcript(video_id) else: fetched = YouTubeTranscriptApi().fetch(video_id) chunks = [{"text": snippet.text} for snippet in fetched] transcript = " ".join(c["text"] for c in chunks) if transcript.strip(): return f"Transcript:\n{transcript[:8000]}" except Exception: pass # 降级:获取视频描述 try: headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"} resp = requests.get(url, headers=headers, timeout=15) resp.raise_for_status() soup = BeautifulSoup(resp.text, "html.parser") desc_meta = soup.find("meta", {"name": "description"}) if desc_meta and desc_meta.get("content"): description = desc_meta["content"] return f"Video description: {description[:4000]}" else: return "Video has no transcript or description." except Exception: return "Video information unavailable."