Spaces:
Sleeping
Sleeping
Luigi D'Addona commited on
Commit ·
ffe7776
1
Parent(s): 1a64e3b
il tool get_youtube_transcript() ora restituisce un dict con i campi "transcript" e "metadata"
Browse files
tools.py
CHANGED
|
@@ -318,13 +318,27 @@ def arxiv_search(query: str) -> str:
|
|
| 318 |
|
| 319 |
|
| 320 |
@tool
|
| 321 |
-
def get_youtube_transcript(url: str) ->
|
| 322 |
-
"""
|
| 323 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
"""
|
| 325 |
-
|
| 326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
|
| 320 |
@tool
|
| 321 |
+
def get_youtube_transcript(url: str) -> dict:
|
| 322 |
+
"""Fetches the transcript from a YouTube video URL.
|
| 323 |
+
|
| 324 |
+
Args:
|
| 325 |
+
url: The URL of the YouTube video.
|
| 326 |
+
|
| 327 |
+
Returns:
|
| 328 |
+
A dictionary containing the transcript and metadata.
|
| 329 |
+
The dictionary will have keys "transcript" (string, the video transcript or an error message) and "metadata" (dictionary, containing video title and other information, if available, otherwise empty).
|
| 330 |
"""
|
| 331 |
+
try:
|
| 332 |
+
loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
|
| 333 |
+
docs = loader.load()
|
| 334 |
+
|
| 335 |
+
# Combine all transcript chunks into a single string
|
| 336 |
+
transcript = "\n".join(doc.page_content for doc in docs)
|
| 337 |
+
metadata = docs[0].metadata if docs else {}
|
| 338 |
|
| 339 |
+
return {"transcript": transcript, "metadata": metadata}
|
| 340 |
+
except Exception as e:
|
| 341 |
+
if "Could not retrieve transcript" in str(e):
|
| 342 |
+
return {"transcript": "No transcript available for this video.", "metadata": {}}
|
| 343 |
+
else:
|
| 344 |
+
return {"transcript": f"Error fetching transcript: {e}", "metadata": {}}
|