Spaces:
Sleeping
Sleeping
| import re | |
| import requests | |
| from bs4 import BeautifulSoup | |
| def youtube_transcript(url: str) -> str: | |
| match = re.search(r"(?:v=|youtu\.be/|/shorts/|/embed/)([0-9A-Za-z_-]{11})", url) | |
| video_id = match.group(1) if match else url.strip() | |
| # 尝试获取字幕 | |
| try: | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| if hasattr(YouTubeTranscriptApi, "get_transcript"): | |
| chunks = YouTubeTranscriptApi.get_transcript(video_id) | |
| else: | |
| fetched = YouTubeTranscriptApi().fetch(video_id) | |
| chunks = [{"text": snippet.text} for snippet in fetched] | |
| transcript = " ".join(c["text"] for c in chunks) | |
| if transcript.strip(): | |
| return f"Transcript:\n{transcript[:8000]}" | |
| except Exception: | |
| pass | |
| # 降级:获取视频描述 | |
| try: | |
| headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"} | |
| resp = requests.get(url, headers=headers, timeout=15) | |
| resp.raise_for_status() | |
| soup = BeautifulSoup(resp.text, "html.parser") | |
| desc_meta = soup.find("meta", {"name": "description"}) | |
| if desc_meta and desc_meta.get("content"): | |
| description = desc_meta["content"] | |
| return f"Video description: {description[:4000]}" | |
| else: | |
| return "Video has no transcript or description." | |
| except Exception: | |
| return "Video information unavailable." |