Spaces:
Sleeping
Sleeping
Update tool.py
Browse files
tool.py
CHANGED
|
@@ -6,14 +6,16 @@ from langchain_community.document_loaders.arxiv import ArxivLoader
|
|
| 6 |
from langchain_community.document_loaders.pubmed import PubMedLoader
|
| 7 |
from typing import Optional
|
| 8 |
|
|
|
|
| 9 |
import os
|
| 10 |
import tempfile
|
| 11 |
import requests
|
| 12 |
-
from urllib.parse import urlparse
|
| 13 |
import pytesseract
|
| 14 |
from PIL import Image
|
| 15 |
import pandas as pd
|
| 16 |
import uuid
|
|
|
|
| 17 |
|
| 18 |
## Simple algebra tools
|
| 19 |
@tool
|
|
@@ -257,4 +259,44 @@ def analyze_excel_file(file_path: str, query: str) -> str:
|
|
| 257 |
return result
|
| 258 |
|
| 259 |
except Exception as e:
|
| 260 |
-
return f"Error analyzing Excel file: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
from langchain_community.document_loaders.pubmed import PubMedLoader
|
| 7 |
from typing import Optional
|
| 8 |
|
| 9 |
+
|
| 10 |
import os
|
| 11 |
import tempfile
|
| 12 |
import requests
|
| 13 |
+
from urllib.parse import urlparse, parse_qs
|
| 14 |
import pytesseract
|
| 15 |
from PIL import Image
|
| 16 |
import pandas as pd
|
| 17 |
import uuid
|
| 18 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 19 |
|
| 20 |
## Simple algebra tools
|
| 21 |
@tool
|
|
|
|
| 259 |
return result
|
| 260 |
|
| 261 |
except Exception as e:
|
| 262 |
+
return f"Error analyzing Excel file: {str(e)}"
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
## Analyze Youtube Transcript tools
|
| 266 |
+
|
| 267 |
+
def extract_video_id(youtube_url: str) -> str | None:
|
| 268 |
+
"""Extract the video ID from a YouTube URL.
|
| 269 |
+
|
| 270 |
+
Supports standard and shortened formats like:
|
| 271 |
+
- https://www.youtube.com/watch?v=VIDEO_ID
|
| 272 |
+
- https://youtu.be/VIDEO_ID
|
| 273 |
+
"""
|
| 274 |
+
try:
|
| 275 |
+
parsed_url = urlparse(youtube_url)
|
| 276 |
+
host = parsed_url.hostname
|
| 277 |
+
|
| 278 |
+
if host in ("www.youtube.com", "youtube.com"):
|
| 279 |
+
return parse_qs(parsed_url.query).get("v", [None])[0]
|
| 280 |
+
elif host == "youtu.be":
|
| 281 |
+
return parsed_url.path.strip("/")
|
| 282 |
+
except Exception:
|
| 283 |
+
return None
|
| 284 |
+
|
| 285 |
+
return None
|
| 286 |
+
|
| 287 |
+
@tool
|
| 288 |
+
def get_youtube_transcript(youtube_url: str) -> str:
|
| 289 |
+
"""Returns the transcript of a YouTube video as plain text.
|
| 290 |
+
|
| 291 |
+
Use this tool to extract spoken words from videos for Q&A, summarization,
|
| 292 |
+
or analysis. This does not include visual or on-screen content.
|
| 293 |
+
"""
|
| 294 |
+
video_id = extract_video_id(youtube_url)
|
| 295 |
+
if not video_id:
|
| 296 |
+
return "Invalid or unsupported YouTube URL format."
|
| 297 |
+
|
| 298 |
+
try:
|
| 299 |
+
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
| 300 |
+
return " ".join(entry["text"] for entry in transcript)
|
| 301 |
+
except Exception as e:
|
| 302 |
+
return f"Transcript unavailable: {str(e)}"
|